In [2]:
import pandas as pd
import numpy as np

In [3]:
df = pd.DataFrame([{'Make': 'Ferrari', 'Model': '458', 'Year': '2012'},
                  {'Make': 'Jeep', 'Model': 'Grand Cherokee', 'Year': '2015'},
                  {'Make': 'Dodge', 'Model': 'Challenger', 'Year': '2016'}], index = ['Car1', 'Car2', 'Car3'])
df

Unnamed: 0,Make,Model,Year
Car1,Ferrari,458,2012
Car2,Jeep,Grand Cherokee,2015
Car3,Dodge,Challenger,2016


In [4]:
df['Body Style'] = ['Sports Car', 'SUV', 'Muscle']
df

Unnamed: 0,Make,Model,Year,Body Style
Car1,Ferrari,458,2012,Sports Car
Car2,Jeep,Grand Cherokee,2015,SUV
Car3,Dodge,Challenger,2016,Muscle


In [5]:
df['Delivered'] = True
df

Unnamed: 0,Make,Model,Year,Body Style,Delivered
Car1,Ferrari,458,2012,Sports Car,True
Car2,Jeep,Grand Cherokee,2015,SUV,True
Car3,Dodge,Challenger,2016,Muscle,True


In [6]:
# This way of adding new values requires a list with the length = #rows
df['Feedback'] = ['Positive', None, 'Negative']
df

Unnamed: 0,Make,Model,Year,Body Style,Delivered,Feedback
Car1,Ferrari,458,2012,Sports Car,True,Positive
Car2,Jeep,Grand Cherokee,2015,SUV,True,
Car3,Dodge,Challenger,2016,Muscle,True,Negative


In [7]:
# Second way of doing this uses reset_index
# Adding a series does not require it to be the length = #rows, more convenient
df_1 = df.reset_index()
df_1['Date'] = pd.Series({0: 'Jan', 2: 'December'})
df_1

Unnamed: 0,index,Make,Model,Year,Body Style,Delivered,Feedback,Date
0,Car1,Ferrari,458,2012,Sports Car,True,Positive,Jan
1,Car2,Jeep,Grand Cherokee,2015,SUV,True,,
2,Car3,Dodge,Challenger,2016,Muscle,True,Negative,December


## Merging Dataframes

In [10]:
students = pd.DataFrame([{'Name': 'Leo', 'Major': 'Mechanical Engineering'},
                        {'Name': 'Victoria', 'Major': 'Marketing'},
                        {'Name': 'Will', 'Major': 'Chemistry'}])
students = students.set_index('Name')

customers = pd.DataFrame([{'Name': 'Michael', 'Tier': 'Gold'},
                         {'Name': 'Victoria', 'Tier': 'Diamond'},
                         {'Name': 'Finn', 'Tier': 'Silver'},
                         {'Name': 'Leo', 'Tier': 'Diamond'}])
customers = customers.set_index('Name')

print(students)
print()
print(customers)

                           Major
Name                            
Leo       Mechanical Engineering
Victoria               Marketing
Will                   Chemistry

             Tier
Name             
Michael      Gold
Victoria  Diamond
Finn       Silver
Leo       Diamond


### Full Outer Join (Union)

In [11]:
pd.merge(students, customers, how = 'outer', left_index = True, right_index = True)

Unnamed: 0_level_0,Major,Tier
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Finn,,Silver
Leo,Mechanical Engineering,Diamond
Michael,,Gold
Victoria,Marketing,Diamond
Will,Chemistry,


### Inner Join (Intersection)

In [12]:
pd.merge(students, customers, how = 'inner', left_index = True, right_index = True)

Unnamed: 0_level_0,Major,Tier
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Leo,Mechanical Engineering,Diamond
Victoria,Marketing,Diamond


### Left Join (all in left)

In [15]:
# left join here shows all students and add their tier information
pd.merge(students, customers, how = 'left', left_index = True, right_index = True)

Unnamed: 0_level_0,Major,Tier
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Leo,Mechanical Engineering,Diamond
Victoria,Marketing,Diamond
Will,Chemistry,


### Right Join

In [16]:
pd.merge(students, customers, how = 'right', left_index = True, right_index = True)

Unnamed: 0_level_0,Major,Tier
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Michael,,Gold
Victoria,Marketing,Diamond
Finn,,Silver
Leo,Mechanical Engineering,Diamond


In [17]:
# Merge can also be done this way
students = students.reset_index()
customers = customers.reset_index()

pd.merge(students, customers, how = 'left', left_on = 'Name', right_on = 'Name')


Unnamed: 0,Name,Major,Tier
0,Leo,Mechanical Engineering,Diamond
1,Victoria,Marketing,Diamond
2,Will,Chemistry,
