In [147]:
people = {
    "first": ["Corey", 'Jane', 'John'], 
    "last": ["Schafer", 'Doe', 'Doe'], 
    "email": ["CoreyMSchafer@gmail.com", 'JaneDoe@email.com', 'JohnDoe@email.com']
}

In [148]:
people['email']

['CoreyMSchafer@gmail.com', 'JaneDoe@email.com', 'JohnDoe@email.com']

In [149]:
import pandas as pd

In [150]:
# create DataFrame object
# keys are columns 
# lists of something are rows
df = pd.DataFrame(people)
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [151]:
df.columns

Index(['first', 'last', 'email'], dtype='object')

In [152]:
df[['first', 'last']] # returns pandas.core.frame.DataFrame

Unnamed: 0,first,last
0,Corey,Schafer
1,Jane,Doe
2,John,Doe


In [153]:
df.iloc[0] # access the single row

df.iloc[[0, 1]] # access multiple rows
 
df.iloc[[0, 1], [2, 1]] # first param - rows
                        # second param  - cols

Unnamed: 0,email,last
0,CoreyMSchafer@gmail.com,Schafer
1,JaneDoe@email.com,Doe


In [154]:
df.loc[[0, 1], ['email', 'last']] # same as iloc but takes string args for second param

Unnamed: 0,email,last
0,CoreyMSchafer@gmail.com,Schafer
1,JaneDoe@email.com,Doe


In [155]:
# PART 4 indexes and conditionals

# set index column 
df.set_index('email') # change once
df.set_index('email', inplace=True) # change forever
df.index # nice unique identifier for the row

Index(['CoreyMSchafer@gmail.com', 'JaneDoe@email.com', 'JohnDoe@email.com'], dtype='object', name='email')

In [156]:
df.loc['CoreyMSchafer@gmail.com'] # we replaced default indexes with strings
                                  # and now the 1st arg is a string cant do df.loc[0]

first      Corey
last     Schafer
Name: CoreyMSchafer@gmail.com, dtype: object

In [157]:
df.reset_index(inplace=True) # reset index assignment

In [158]:
filt = (df['last'] == 'Doe') & (df['first'] == 'John')
df[filt]

Unnamed: 0,email,first,last
2,JohnDoe@email.com,John,Doe


In [159]:
df.loc[filt, 'email'] # 1st rows that we want, 2nd columns that we want

2    JohnDoe@email.com
Name: email, dtype: object

In [160]:
# PART 5 Updating rows and columns - modifying data withing the data frame

# replace columns in order
df.columns = ['first_name', 'last_name', 'email']

In [161]:
# use list comprehensions
df.columns = [x.lower() for x in df.columns]
df

Unnamed: 0,first_name,last_name,email
0,CoreyMSchafer@gmail.com,Corey,Schafer
1,JaneDoe@email.com,Jane,Doe
2,JohnDoe@email.com,John,Doe


In [162]:
# replace something in columns
df.columns = df.columns.str.replace('_', ' ') # replace all underscores with spaces vice versa
df.columns = df.columns.str.replace(' ', '_') 

In [163]:
# rename columns 
df.rename(columns={'first_name' : 'first', 'last_name' : 'last'}, inplace=True)
df

Unnamed: 0,first,last,email
0,CoreyMSchafer@gmail.com,Corey,Schafer
1,JaneDoe@email.com,Jane,Doe
2,JohnDoe@email.com,John,Doe


In [164]:
df.loc[2] = ['John', 'Smith', 'John@email.com']

In [165]:
# change only particular values if we do not want to modify all of them 
df.loc[2, ['last', 'email']] = ['Doe', 'JohnDoe@email.com'] # we can change as list or string for the 2nd param
df

Unnamed: 0,first,last,email
0,CoreyMSchafer@gmail.com,Corey,Schafer
1,JaneDoe@email.com,Jane,Doe
2,John,Doe,JohnDoe@email.com


In [166]:
# we can not assign it like this because the copy is returned
# filt = (df['email'] == 'JohnDoe@email.com')
# df[filt]['last'] = 'Smith'

# instead we should use loc or iloc 
filt = (df['email'] == 'JohnDoe@email.com')
df.loc[filt, 'last'] = 'Smith'
df

Unnamed: 0,first,last,email
0,CoreyMSchafer@gmail.com,Corey,Schafer
1,JaneDoe@email.com,Jane,Doe
2,John,Smith,JohnDoe@email.com


In [167]:
df['email'] = df['email'].str.lower()
df

Unnamed: 0,first,last,email
0,CoreyMSchafer@gmail.com,Corey,schafer
1,JaneDoe@email.com,Jane,doe
2,John,Smith,johndoe@email.com


In [168]:
df['email'].apply(len)

0     7
1     3
2    17
Name: email, dtype: int64

In [169]:
def update_email(email):
    return email.upper()

df['email'].apply(str.lower)
df['email'].apply(str.upper)

# or pass custom functions ... 
# to actually modify values, we can just assign them
df['email'] = df['email'].apply(update_email)
df

Unnamed: 0,first,last,email
0,CoreyMSchafer@gmail.com,Corey,SCHAFER
1,JaneDoe@email.com,Jane,DOE
2,John,Smith,JOHNDOE@EMAIL.COM


In [170]:
df['email'] = df['email'].apply(lambda x: x.lower())
df

Unnamed: 0,first,last,email
0,CoreyMSchafer@gmail.com,Corey,schafer
1,JaneDoe@email.com,Jane,doe
2,John,Smith,johndoe@email.com


In [173]:
df.applymap(len)
# df.applymap(str.lower)

Unnamed: 0,first,last,email
0,CoreyMSchafer@gmail.com,Corey,schafer
1,JaneDoe@email.com,Jane,doe
2,John,Smith,johndoe@email.com


In [175]:
df = pd.DataFrame(people)
df['first'].map({'Corey' : 'Chris', 'Ja ne' : 'Mary'})

0    Chris
1     Mary
2      NaN
Name: first, dtype: object

In [177]:
df['first'].replace({'Corey' : 'Chris', 'Ja ne' : 'Mary'})

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com
