# Combining datasets

In [1]:
import pandas as pd

In [6]:
left = pd.DataFrame({'col1' : [1,2,3,4,5], 'col2' : ['a', 'b', 'c', 'd', 'e']})
right = pd.DataFrame({'col1' : [3,4,5,6,7], 'col3' : ['f', 'g', 'h', 'i', 'j']})

print(left)
print(right)

   col1 col2
0     1    a
1     2    b
2     3    c
3     4    d
4     5    e
   col1 col3
0     3    f
1     4    g
2     5    h
3     6    i
4     7    j


# merge the two dataframes

In [7]:
pd.merge(left=left, right=right, on='col1')

Unnamed: 0,col1,col2,col3
0,3,c,f
1,4,d,g
2,5,e,h


In [8]:
pd.merge(left, right, on='col1', how='left')

Unnamed: 0,col1,col2,col3
0,1,a,
1,2,b,
2,3,c,f
3,4,d,g
4,5,e,h


In [9]:
pd.merge(left, right, on='col1', how='right')

Unnamed: 0,col1,col2,col3
0,3,c,f
1,4,d,g
2,5,e,h
3,6,,i
4,7,,j


In [11]:
# df1 is a pandas DataFrame
# df1 gets passed a dictionary of keys and values
df1 = pd.DataFrame({'account_id' : [1, 2, 3, 11382],       # account_id as the key and list of ints as values
                   'gender' : ['female', 'male', 'female', 'male'], 
                   'age' : [55, 25, 29, 39]})
df1

Unnamed: 0,account_id,gender,age
0,1,female,55
1,2,male,25
2,3,female,29
3,11382,male,39


In [12]:
df2 = pd.DataFrame({'account_id' : [4, 5, 6, 7],       # account_id as the key and list of ints as values
                   'gender' : ['female', 'male', 'female', 'male'], 
                   'age' : [19, 28, 14, 15]})
df2

Unnamed: 0,account_id,gender,age
0,4,female,19
1,5,male,28
2,6,female,14
3,7,male,15


In [14]:
pd.concat([df1, df2], ignore_index=True)

Unnamed: 0,account_id,gender,age
0,1,female,55
1,2,male,25
2,3,female,29
3,11382,male,39
4,4,female,19
5,5,male,28
6,6,female,14
7,7,male,15


In [15]:
hero_powers=pd.read_pickle('hero_powers.pkl')
hero_dc=pd.read_pickle('hero_dc.pkl')
hero_marvel=pd.read_pickle('hero_marvel.pkl')

In [16]:
hero_marvel.head(1)

Unnamed: 0,name,Gender,Eye color,Race,Hair color,Height,Publisher,Alignment,Weight
0,A-Bomb,Male,yellow,Human,No Hair,203,Marvel Comics,good,441


In [17]:
hero_dc.head(1)

Unnamed: 0,name,Gender,Eye color,Race,Hair color,Height,Publisher,Alignment,Weight
0,Abin Sur,Male,blue,Ungaran,No Hair,185,DC Comics,good,90.0


In [18]:
# shows how many rows and columns I have
hero_dc.shape

(215, 9)

In [19]:
hero_marvel.shape

(388, 9)

In [20]:
215+388

603

In [26]:
hero_info=pd.concat([hero_dc, hero_marvel], ignore_index=True)

In [27]:
hero_info.head(1)

Unnamed: 0,name,Gender,Eye color,Race,Hair color,Height,Publisher,Alignment,Weight
0,Abin Sur,Male,blue,Ungaran,No Hair,185,DC Comics,good,90.0


In [28]:
hero_powers.head(1)

Unnamed: 0,hero_names,Agility,Accelerated Healing,Lantern Power Ring,Dimensional Awareness,Cold Resistance,Durability,Stealth,Energy Absorption,Flight,...,Web Creation,Reality Warping,Odin Force,Symbiote Costume,Speed Force,Phoenix Force,Molecular Dissipation,Vision - Cryo,Omnipresent,Omniscient
0,3-D Man,True,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [30]:
hero=pd.merge(hero_info, hero_powers, left_on='name', right_on='hero_names', how='inner')

In [32]:
hero.head(1)

Unnamed: 0,name,Gender,Eye color,Race,Hair color,Height,Publisher,Alignment,Weight,hero_names,...,Web Creation,Reality Warping,Odin Force,Symbiote Costume,Speed Force,Phoenix Force,Molecular Dissipation,Vision - Cryo,Omnipresent,Omniscient
0,Abin Sur,Male,blue,Ungaran,No Hair,185,DC Comics,good,90.0,Abin Sur,...,False,False,False,False,False,False,False,False,False,False


In [33]:
hero.shape

(537, 177)

# Renaming columns

In [38]:
hero.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 537 entries, 0 to 536
Data columns (total 177 columns):
 #    Column                        Dtype  
---   ------                        -----  
 0    name                          object 
 1    Gender                        object 
 2    Eye color                     object 
 3    Race                          object 
 4    Hair color                    object 
 5    Height                        int64  
 6    Publisher                     object 
 7    Alignment                     object 
 8    Weight                        float64
 9    hero_names                    object 
 10   Agility                       bool   
 11   Accelerated Healing           bool   
 12   Lantern Power Ring            bool   
 13   Dimensional Awareness         bool   
 14   Cold Resistance               bool   
 15   Durability                    bool   
 16   Stealth                       bool   
 17   Energy Absorption             bool   
 18   Flight  