Working with Pandas DataFrames
===========================

In [None]:
import pandas as pd

import os

In [None]:
file_path = os.path.join("data", "PubChemElements_all.csv")

df = pd.read_csv(file_path)

## Examining Data

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.describe()

## Accessing Data

### Accessing with rows and column numbers

In [None]:
df.iloc[35]

### Accessing with names

In [None]:
df.columns

In [None]:
df['Electronegativity'].head()

In [None]:
df[['Name','Electronegativity']]

In [None]:
df.loc[35, 'YearDiscovered']

In [None]:
df.set_index('Symbol', inplace=True)

## Performing Operations on Columns

In [None]:
df['MeltingPointC'] = df['MeltingPoint'] - 273.15

### The `.apply` operator

In [None]:
df.head()

In [None]:
# Number of letters in name - 
# point - you can use built in functions
df["Name"].apply(len)

In [None]:
# Apply with custom functions

def calculate_num_valence(electron_configuration):
    """Calculate the number of valence electrons based on an electron configuration"""
    
    # Split on spaces
    split_configuration = electron_configuration.split()
    
    electron_sum = 0
    for split in split_configuration:
        last_two = split[-2:]
        
        electrons = ''
        for letter in last_two:
            if letter.isdigit():
                electrons += letter
        if electrons:
            electron_sum += int(electrons)
    
    return electron_sum

In [None]:
calculate_num_valence('[He]2s2 2p2')

In [None]:
df["Number of Valence"] = df["ElectronConfiguration"].apply(valence_electrons)

In [None]:
df.head()

# Built-in plotting

In [None]:
df.plot()

In [None]:
df.plot(x="Electronegativity", y="AtomicRadius", kind="scatter")