In [231]:
people = {
    "first": ["Corey", 'Jane', 'John', 'Adam'], 
    "last": ["Schafer", 'Doe', 'Doe', 'Adamovich'], 
    "email": ["CoreyMSchafer@gmail.com", 'JaneDoe@email.com', 'JohnDoe@email.com', 'Adam@kek.com']
}

In [232]:
people['email']

['CoreyMSchafer@gmail.com',
 'JaneDoe@email.com',
 'JohnDoe@email.com',
 'Adam@kek.com']

In [233]:
import pandas as pd

In [234]:
# create DataFrame object
# keys are columns 
# lists of something are rows
df = pd.DataFrame(people)
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com
3,Adam,Adamovich,Adam@kek.com


In [235]:
df.columns

Index(['first', 'last', 'email'], dtype='object')

In [236]:
df[['first', 'last']] # returns pandas.core.frame.DataFrame

Unnamed: 0,first,last
0,Corey,Schafer
1,Jane,Doe
2,John,Doe
3,Adam,Adamovich


In [237]:
df.iloc[0] # access the single row

df.iloc[[0, 1]] # access multiple rows
 
df.iloc[[0, 1], [2, 1]] # first param - rows
                        # second param  - cols

Unnamed: 0,email,last
0,CoreyMSchafer@gmail.com,Schafer
1,JaneDoe@email.com,Doe


In [238]:
df.loc[[0, 1], ['email', 'last']] # same as iloc but takes string args for second param

Unnamed: 0,email,last
0,CoreyMSchafer@gmail.com,Schafer
1,JaneDoe@email.com,Doe


In [239]:
# PART 4 indexes and conditionals

# set index column 
df.set_index('email') # change once
df.set_index('email', inplace=True) # change forever
df.index # nice unique identifier for the row

Index(['CoreyMSchafer@gmail.com', 'JaneDoe@email.com', 'JohnDoe@email.com',
       'Adam@kek.com'],
      dtype='object', name='email')

In [240]:
df.loc['CoreyMSchafer@gmail.com'] # we replaced default indexes with strings
                                  # and now the 1st arg is a string cant do df.loc[0]

first      Corey
last     Schafer
Name: CoreyMSchafer@gmail.com, dtype: object

In [241]:
df.reset_index(inplace=True) # reset index assignment

In [242]:
filt = (df['last'] == 'Doe') & (df['first'] == 'John')
df[filt]

Unnamed: 0,email,first,last
2,JohnDoe@email.com,John,Doe


In [243]:
df.loc[filt, 'email'] # 1st rows that we want, 2nd columns that we want

2    JohnDoe@email.com
Name: email, dtype: object

In [244]:
# PART 5 Updating rows and columns - modifying data withing the data frame

# replace columns in order
df.columns = ['first_name', 'last_name', 'email']

In [245]:
# use list comprehensions
df.columns = [x.lower() for x in df.columns]
df

Unnamed: 0,first_name,last_name,email
0,CoreyMSchafer@gmail.com,Corey,Schafer
1,JaneDoe@email.com,Jane,Doe
2,JohnDoe@email.com,John,Doe
3,Adam@kek.com,Adam,Adamovich


In [246]:
# replace something in columns
df.columns = df.columns.str.replace('_', ' ') # replace all underscores with spaces vice versa
df.columns = df.columns.str.replace(' ', '_') 

In [247]:
# rename columns 
df.rename(columns={'first_name' : 'first', 'last_name' : 'last'}, inplace=True)
df

Unnamed: 0,first,last,email
0,CoreyMSchafer@gmail.com,Corey,Schafer
1,JaneDoe@email.com,Jane,Doe
2,JohnDoe@email.com,John,Doe
3,Adam@kek.com,Adam,Adamovich


In [248]:
df.loc[2] = ['John', 'Smith', 'John@email.com']

In [249]:
# change only particular values if we do not want to modify all of them 
df.loc[2, ['last', 'email']] = ['Doe', 'JohnDoe@email.com'] # we can change as list or string for the 2nd param
df

Unnamed: 0,first,last,email
0,CoreyMSchafer@gmail.com,Corey,Schafer
1,JaneDoe@email.com,Jane,Doe
2,John,Doe,JohnDoe@email.com
3,Adam@kek.com,Adam,Adamovich


In [250]:
# we can not assign it like this because the copy is returned
# filt = (df['email'] == 'JohnDoe@email.com')
# df[filt]['last'] = 'Smith'

# instead we should use loc or iloc 
filt = (df['email'] == 'JohnDoe@email.com')
df.loc[filt, 'last'] = 'Smith'
df

Unnamed: 0,first,last,email
0,CoreyMSchafer@gmail.com,Corey,Schafer
1,JaneDoe@email.com,Jane,Doe
2,John,Smith,JohnDoe@email.com
3,Adam@kek.com,Adam,Adamovich


In [251]:
df['email'] = df['email'].str.lower()
df

Unnamed: 0,first,last,email
0,CoreyMSchafer@gmail.com,Corey,schafer
1,JaneDoe@email.com,Jane,doe
2,John,Smith,johndoe@email.com
3,Adam@kek.com,Adam,adamovich


In [252]:
df['email'].apply(len)

0     7
1     3
2    17
3     9
Name: email, dtype: int64

In [253]:
def update_email(email):
    return email.upper()

df['email'].apply(str.lower)
df['email'].apply(str.upper)

# or pass custom functions ... 
# to actually modify values, we can just assign them
df['email'] = df['email'].apply(update_email)
df

Unnamed: 0,first,last,email
0,CoreyMSchafer@gmail.com,Corey,SCHAFER
1,JaneDoe@email.com,Jane,DOE
2,John,Smith,JOHNDOE@EMAIL.COM
3,Adam@kek.com,Adam,ADAMOVICH


In [254]:
df['email'] = df['email'].apply(lambda x: x.lower())
df

Unnamed: 0,first,last,email
0,CoreyMSchafer@gmail.com,Corey,schafer
1,JaneDoe@email.com,Jane,doe
2,John,Smith,johndoe@email.com
3,Adam@kek.com,Adam,adamovich


In [255]:
df.applymap(len)
# df.applymap(str.lower)

Unnamed: 0,first,last,email
0,23,5,7
1,17,4,3
2,4,5,17
3,12,4,9


In [256]:
df = pd.DataFrame(people)
df['first'].map({'Corey' : 'Chris', 'Ja ne' : 'Mary'})

0    Chris
1      NaN
2      NaN
3      NaN
Name: first, dtype: object

In [257]:
df['first'].replace({'Corey' : 'Chris', 'Ja ne' : 'Mary'}, )

0    Chris
1     Jane
2     John
3     Adam
Name: first, dtype: object

In [258]:
# Part 6 Add/remove columns

# adding new columns
df['full_name'] = df['first']  + ' ' + df['last'] 
df

Unnamed: 0,first,last,email,full_name
0,Corey,Schafer,CoreyMSchafer@gmail.com,Corey Schafer
1,Jane,Doe,JaneDoe@email.com,Jane Doe
2,John,Doe,JohnDoe@email.com,John Doe
3,Adam,Adamovich,Adam@kek.com,Adam Adamovich


In [259]:
# remove columns
df.drop(columns=['first', 'last'])
df

Unnamed: 0,first,last,email,full_name
0,Corey,Schafer,CoreyMSchafer@gmail.com,Corey Schafer
1,Jane,Doe,JaneDoe@email.com,Jane Doe
2,John,Doe,JohnDoe@email.com,John Doe
3,Adam,Adamovich,Adam@kek.com,Adam Adamovich


In [260]:
df['full_name'].str.split(' ', expand=True) # expand - return dataframe instead of returned lst

Unnamed: 0,0,1
0,Corey,Schafer
1,Jane,Doe
2,John,Doe
3,Adam,Adamovich


In [261]:
# add rows and deleting rows
df.append({'first' : 'Tony'}, ignore_index=True)

  df.append({'first' : 'Tony'}, ignore_index=True)


Unnamed: 0,first,last,email,full_name
0,Corey,Schafer,CoreyMSchafer@gmail.com,Corey Schafer
1,Jane,Doe,JaneDoe@email.com,Jane Doe
2,John,Doe,JohnDoe@email.com,John Doe
3,Adam,Adamovich,Adam@kek.com,Adam Adamovich
4,Tony,,,


In [262]:
people = {
    "first": ["Tony", 'Steve'], 
    "last": ["Stark", 'Rogers'], 
    "email": ["IronMan@avenge.com", 'Cap@avenge.com']
}
df2 = pd.DataFrame(people)
df2

Unnamed: 0,first,last,email
0,Tony,Stark,IronMan@avenge.com
1,Steve,Rogers,Cap@avenge.com


In [263]:
# append another table to the current data frame

# df.append(df2, ignore_index=True, sort=False)

# make changes permanent
df = df.append(df2, ignore_index=True, sort=False) 
df

  df = df.append(df2, ignore_index=True, sort=False)


Unnamed: 0,first,last,email,full_name
0,Corey,Schafer,CoreyMSchafer@gmail.com,Corey Schafer
1,Jane,Doe,JaneDoe@email.com,Jane Doe
2,John,Doe,JohnDoe@email.com,John Doe
3,Adam,Adamovich,Adam@kek.com,Adam Adamovich
4,Tony,Stark,IronMan@avenge.com,
5,Steve,Rogers,Cap@avenge.com,


In [264]:
df.drop_duplicates(inplace=True) # remove all duplicates
df

Unnamed: 0,first,last,email,full_name
0,Corey,Schafer,CoreyMSchafer@gmail.com,Corey Schafer
1,Jane,Doe,JaneDoe@email.com,Jane Doe
2,John,Doe,JohnDoe@email.com,John Doe
3,Adam,Adamovich,Adam@kek.com,Adam Adamovich
4,Tony,Stark,IronMan@avenge.com,
5,Steve,Rogers,Cap@avenge.com,


In [265]:
# drop with conditional
filt = df['last'] == 'Doe'
df.drop(index=df[filt].index)

# update rows
df.loc[[3, 4], 'full_name'] = 'Tony Stark'
df

Unnamed: 0,first,last,email,full_name
0,Corey,Schafer,CoreyMSchafer@gmail.com,Corey Schafer
1,Jane,Doe,JaneDoe@email.com,Jane Doe
2,John,Doe,JohnDoe@email.com,John Doe
3,Adam,Adamovich,Adam@kek.com,Tony Stark
4,Tony,Stark,IronMan@avenge.com,Tony Stark
5,Steve,Rogers,Cap@avenge.com,


In [266]:
# Partd 7 Sorting Data

# sort by key
# df.sort_values(by='last') # we pass ascending=False 


In [270]:
# pass list of bool values 
df.sort_values(by=['first', 'last'], ascending=[True, True], inplace=False)

Unnamed: 0,first,last,email,full_name
3,Adam,Adamovich,Adam@kek.com,Tony Stark
0,Corey,Schafer,CoreyMSchafer@gmail.com,Corey Schafer
1,Jane,Doe,JaneDoe@email.com,Jane Doe
2,John,Doe,JohnDoe@email.com,John Doe
5,Steve,Rogers,Cap@avenge.com,
4,Tony,Stark,IronMan@avenge.com,Tony Stark


In [272]:
# undo previous sorting
df.sort_index()
df

Unnamed: 0,first,last,email,full_name
0,Corey,Schafer,CoreyMSchafer@gmail.com,Corey Schafer
1,Jane,Doe,JaneDoe@email.com,Jane Doe
2,John,Doe,JohnDoe@email.com,John Doe
3,Adam,Adamovich,Adam@kek.com,Tony Stark
4,Tony,Stark,IronMan@avenge.com,Tony Stark
5,Steve,Rogers,Cap@avenge.com,
