# Data Wrangling: Join, Combine, and Reshape

In [1]:
import numpy as np
import pandas as pd

# Hierarchical Indexing

In [2]:
# data = pd.Series(np.random.randn(9), index=[['a', 'a', 'a', 'b', 'b', 'c', 'c', 'd', 'd'], 
#                                             [1, 2, 3, 1, 3, 1, 2, 2, 3]])
data = pd.Series(np.random.randn(8), index=[['a', 'a', 'b', 'b', 'c', 'c', 'd', 'd'], 
                                            [1, 2, 1, 3, 1, 2, 2, 3]])
print(f"Data structure dimenson: {data.ndim}")
data

Data structure dimenson: 1


a  1    1.065093
   2   -1.577454
b  1   -0.093059
   3   -1.016815
c  1   -0.273679
   2   -0.764733
d  2    0.965069
   3   -0.199088
dtype: float64

In [3]:
data.index

MultiIndex([('a', 1),
            ('a', 2),
            ('b', 1),
            ('b', 3),
            ('c', 1),
            ('c', 2),
            ('d', 2),
            ('d', 3)],
           )

In [4]:
#Indexing.
data['c']

1   -0.273679
2   -0.764733
dtype: float64

In [5]:
# Accessing subindex of parent Index.
data['c'][2]

-0.7647327787279193

In [6]:
#Slicing.
data['b':'c']

b  1   -0.093059
   3   -1.016815
c  1   -0.273679
   2   -0.764733
dtype: float64

In [7]:
# Selecting labels.
data.loc[['b', 'd']]

b  1   -0.093059
   3   -1.016815
d  2    0.965069
   3   -0.199088
dtype: float64

In [8]:
data

a  1    1.065093
   2   -1.577454
b  1   -0.093059
   3   -1.016815
c  1   -0.273679
   2   -0.764733
d  2    0.965069
   3   -0.199088
dtype: float64

In [9]:
data.loc[:,2]

a   -1.577454
c   -0.764733
d    0.965069
dtype: float64

In [10]:
#Slicing on numpy array.
# arr = np.arange(1,21).reshape(-1)
# arr = np.arange(1,21).reshape(4,5)
arr = np.arange(1,21).reshape((4,5))
np.reshape
arr

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15],
       [16, 17, 18, 19, 20]])

In [11]:
pivoted_data = data.unstack()
pivoted_data

Unnamed: 0,1,2,3
a,1.065093,-1.577454,
b,-0.093059,,-1.016815
c,-0.273679,-0.764733,
d,,0.965069,-0.199088


In [12]:
pivoted_data.stack()

a  1    1.065093
   2   -1.577454
b  1   -0.093059
   3   -1.016815
c  1   -0.273679
   2   -0.764733
d  2    0.965069
   3   -0.199088
dtype: float64

In [13]:
data2 = pd.Series(np.random.randn(9), index=[['a', 'a', 'a', 'b', 'b', 'c', 'c', 'd', 'd'], 
                                             [1, 2, 3, 1, 3, 1, 2, 2, 3]])
data2

a  1   -1.493547
   2    0.704122
   3    0.582767
b  1    2.308863
   3    0.261661
c  1    0.918648
   2   -0.805550
d  2   -0.139800
   3   -0.581363
dtype: float64

In [14]:
frame = pd.DataFrame(np.arange(12).reshape((4, 3)), index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]],
                     columns=[['Ohio', 'Ohio', 'Colorado'],['Green', 'Red', 'Green']])
frame

Unnamed: 0_level_0,Unnamed: 1_level_0,Ohio,Ohio,Colorado
Unnamed: 0_level_1,Unnamed: 1_level_1,Green,Red,Green
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [15]:
frame.index.names = ["key1", "key2"]

In [16]:
frame.columns.names = ['state', 'color']

In [17]:
frame

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [18]:
# Selecting group of values from a column.
frame['Ohio']

Unnamed: 0_level_0,color,Green,Red
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,0,1
a,2,3,4
b,1,6,7
b,2,9,10


In [19]:
# the columns in the preceding DataFrame with level names could be created like this:
frame_new = pd.MultiIndex.from_arrays([['Ohio', 'Ohio', 'Colorado'], ['Green', 'Red', 'Green']],
names=['state','color']).to_frame()
frame_new

Unnamed: 0_level_0,Unnamed: 1_level_0,state,color
state,color,Unnamed: 2_level_1,Unnamed: 3_level_1
Ohio,Green,Ohio,Green
Ohio,Red,Ohio,Red
Colorado,Green,Colorado,Green


In [20]:
# the columns in the preceding DataFrame with level names could be created like this:
frame_new = pd.MultiIndex.from_arrays([['Ohio', 'Ohio', 'Colorado'], ['Green', 'Red', 'Green']],
names=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]]).to_frame()
frame_new

TypeError: MultiIndex.name must be a hashable type

In [None]:
frame2 = pd.DataFrame(np.arange(24).reshape((4, 6)), index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]],
                     columns=[['Ohio', 'Ohio', 'Colorado', "Chicago", "California", "Florida"],
                              ['Green', 'Red', 'Green', 'Green',"Red","Orange"]])
frame2

# Reordering and Sorting Levels

In [21]:
# Original Dataframe.
frame

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [22]:
new_dataframe = frame.swaplevel('key1', 'key2')
new_dataframe

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key2,key1,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1,a,0,1,2
2,a,3,4,5
1,b,6,7,8
2,b,9,10,11


In [23]:
frame

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [24]:
new_dataframe

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key2,key1,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1,a,0,1,2
2,a,3,4,5
1,b,6,7,8
2,b,9,10,11


In [25]:
frame.sort_index(level=1)

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
b,1,6,7,8
a,2,3,4,5
b,2,9,10,11


In [26]:
frame.sort_index(level=1)

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
b,1,6,7,8
a,2,3,4,5
b,2,9,10,11


In [27]:
frame.sort_index(level="key2")

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
b,1,6,7,8
a,2,3,4,5
b,2,9,10,11


In [28]:
frame.sort_index(axis=1,level=1)

Unnamed: 0_level_0,state,Colorado,Ohio,Ohio
Unnamed: 0_level_1,color,Green,Green,Red
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,2,0,1
a,2,5,3,4
b,1,8,6,7
b,2,11,9,10


In [29]:
frame = pd.DataFrame(np.arange(12).reshape((4, 3)), index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]],
                     columns=[['Ohio', 'Ohio', 'Colorado'],['Green', 'Red', 'Green']])
frame.index.names = ["key1","key2"]
frame.columns.names = ["state","color"]
frame

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [30]:
frame.sort_index(axis=1,level=1)

Unnamed: 0_level_0,state,Colorado,Ohio,Ohio
Unnamed: 0_level_1,color,Green,Green,Red
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,2,0,1
a,2,5,3,4
b,1,8,6,7
b,2,11,9,10


In [31]:
frame

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [32]:
frame.sum(level='key2')

state,Ohio,Ohio,Colorado
color,Green,Red,Green
key2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
1,6,8,10
2,12,14,16


In [33]:
frame.sum(level='key1')

state,Ohio,Ohio,Colorado
color,Green,Red,Green
key1,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
a,3,5,7
b,15,17,19


In [34]:
frame["Ohio"] # Column Indexing.

Unnamed: 0_level_0,color,Green,Red
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,0,1
a,2,3,4
b,1,6,7
b,2,9,10


In [35]:
frame2 = pd.DataFrame(np.arange(12).reshape((3, 4)), index=[['a', 'a', 'b'], [1, 2, 1]],
                     columns=[['Ohio', 'Ohio', 'Colorado', "Chicago"],['Green', 'Red', 'Green', "Orange"]])
frame2.index.names = ["key1","key2"]
frame2.columns.names = ["state","color"]
frame2

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado,Chicago
Unnamed: 0_level_1,color,Green,Red,Green,Orange
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
a,1,0,1,2,3
a,2,4,5,6,7
b,1,8,9,10,11


In [36]:
frame3 = pd.DataFrame({'a': range(7), 'b': range(7, 0, -1), 'c': ['one', 'one', 'one', 'two', 'two', 'two', 'two'],
                       'd': [0, 1, 2, 0, 1, 2, 3]})
frame3

Unnamed: 0,a,b,c,d
0,0,7,one,0
1,1,6,one,1
2,2,5,one,2
3,3,4,two,0
4,4,3,two,1
5,5,2,two,2
6,6,1,two,3


In [37]:
frame3_new_version = frame3.set_index(['c','d'])
frame3_new_version #c and d columns were removed when they became index.

Unnamed: 0_level_0,Unnamed: 1_level_0,a,b
c,d,Unnamed: 2_level_1,Unnamed: 3_level_1
one,0,0,7
one,1,1,6
one,2,2,5
two,0,3,4
two,1,4,3
two,2,5,2
two,3,6,1


In [38]:
frame3

Unnamed: 0,a,b,c,d
0,0,7,one,0
1,1,6,one,1
2,2,5,one,2
3,3,4,two,0
4,4,3,two,1
5,5,2,two,2
6,6,1,two,3


In [39]:
frame4 = frame3.set_index(['c', 'd'], drop=False)
frame4

Unnamed: 0_level_0,Unnamed: 1_level_0,a,b,c,d
c,d,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
one,0,0,7,one,0
one,1,1,6,one,1
one,2,2,5,one,2
two,0,3,4,two,0
two,1,4,3,two,1
two,2,5,2,two,2
two,3,6,1,two,3


In [40]:
frame3_new_version.reset_index()

Unnamed: 0,c,d,a,b
0,one,0,0,7
1,one,1,1,6
2,one,2,2,5
3,two,0,3,4
4,two,1,4,3
5,two,2,5,2
6,two,3,6,1


# Combining and Merging Datasets

In [41]:
# Loaded Dataset.
dc_heroes = pd.read_csv("dc-marvel-heroes/Heroes_DC.csv")
marvel_heroes = pd.read_csv("dc-marvel-heroes/Heroes_Marvel.csv")

In [42]:
# Sample Datasets
df1 = pd.DataFrame({'key': ['b', 'b', 'a', 'c', 'a', 'a', 'b'], 'data1': range(7)})
df1

Unnamed: 0,key,data1
0,b,0
1,b,1
2,a,2
3,c,3
4,a,4
5,a,5
6,b,6


In [43]:
df2 = pd.DataFrame({'key': ['a', 'b', 'd'],'data2': range(3)})
df2

Unnamed: 0,key,data2
0,a,0
1,b,1
2,d,2


In [44]:
pd.merge(df1,df2, sort=True, indicator=True)

Unnamed: 0,key,data1,data2,_merge
0,a,2,0,both
1,a,4,0,both
2,a,5,0,both
3,b,0,1,both
4,b,1,1,both
5,b,6,1,both


In [45]:
pd.merge(df1,df2,how="outer",indicator=True)

Unnamed: 0,key,data1,data2,_merge
0,b,0.0,1.0,both
1,b,1.0,1.0,both
2,b,6.0,1.0,both
3,a,2.0,0.0,both
4,a,4.0,0.0,both
5,a,5.0,0.0,both
6,c,3.0,,left_only
7,d,,2.0,right_only


In [46]:
pd.merge(df1,df2,how="left",indicator=True)

Unnamed: 0,key,data1,data2,_merge
0,b,0,1.0,both
1,b,1,1.0,both
2,a,2,0.0,both
3,c,3,,left_only
4,a,4,0.0,both
5,a,5,0.0,both
6,b,6,1.0,both


In [47]:
# Now we are specifying columns.
pd.merge(df1, df2, on='key', sort=True)

Unnamed: 0,key,data1,data2
0,a,2,0
1,a,4,0
2,a,5,0
3,b,0,1
4,b,1,1
5,b,6,1


## When we don't have a common column in datasets.

In [48]:
df3 = pd.DataFrame({'lkey': ['b', 'b', 'a', 'c', 'a', 'a', 'b'],'data1': range(7)})
df3

Unnamed: 0,lkey,data1
0,b,0
1,b,1
2,a,2
3,c,3
4,a,4
5,a,5
6,b,6


In [49]:
df4 = pd.DataFrame({'rkey': ['a', 'b', 'd'],'data2': range(3)})
df4

Unnamed: 0,rkey,data2
0,a,0
1,b,1
2,d,2


In [50]:
pd.merge(df3, df4, left_on='lkey', right_on='rkey', indicator=True)

Unnamed: 0,lkey,data1,rkey,data2,_merge
0,b,0,b,1,both
1,b,1,b,1,both
2,b,6,b,1,both
3,a,2,a,0,both
4,a,4,a,0,both
5,a,5,a,0,both


In [51]:
df5 = pd.DataFrame({'key': ['b', 'b', 'a', 'c', 'a', 'b'], 'data1': range(6)})
df6 = pd.DataFrame({'key': ['a', 'b', 'a', 'b', 'd'],'data2': range(5)})

In [52]:
df5

Unnamed: 0,key,data1
0,b,0
1,b,1
2,a,2
3,c,3
4,a,4
5,b,5


In [53]:
df6

Unnamed: 0,key,data2
0,a,0
1,b,1
2,a,2
3,b,3
4,d,4


In [54]:
pd.merge(df5,df6,how="left",indicator=True)

Unnamed: 0,key,data1,data2,_merge
0,b,0,1.0,both
1,b,0,3.0,both
2,b,1,1.0,both
3,b,1,3.0,both
4,a,2,0.0,both
5,a,2,2.0,both
6,c,3,,left_only
7,a,4,0.0,both
8,a,4,2.0,both
9,b,5,1.0,both


In [55]:
pd.merge(df5,df6,how="right",indicator=True)

Unnamed: 0,key,data1,data2,_merge
0,b,0.0,1,both
1,b,1.0,1,both
2,b,5.0,1,both
3,b,0.0,3,both
4,b,1.0,3,both
5,b,5.0,3,both
6,a,2.0,0,both
7,a,4.0,0,both
8,a,2.0,2,both
9,a,4.0,2,both


### To merge with multiple keys, pass a list of column names

In [56]:
left = pd.DataFrame({'key1': ['foo', 'foo', 'bar'],'key2': ['one', 'two', 'one'],'lval': [1, 2, 3]})
left

Unnamed: 0,key1,key2,lval
0,foo,one,1
1,foo,two,2
2,bar,one,3


In [57]:
right = pd.DataFrame({'key1': ['foo', 'foo', 'bar', 'bar'],'key2': ['one', 'one', 'one', 'two'],
                      'rval': [4, 5, 6, 7]})
right

Unnamed: 0,key1,key2,rval
0,foo,one,4
1,foo,one,5
2,bar,one,6
3,bar,two,7


In [58]:
pd.merge(left,right, on=["key1","key2"], how="outer")

Unnamed: 0,key1,key2,lval,rval
0,foo,one,1.0,4.0
1,foo,one,1.0,5.0
2,foo,two,2.0,
3,bar,one,3.0,6.0
4,bar,two,,7.0


In [59]:
pd.merge(left,right, on=["key1","key3"], how="outer")

KeyError: 'key3'

In [60]:
pd.merge(left,right,on=("key1"),copy=False)

Unnamed: 0,key1,key2_x,lval,key2_y,rval
0,foo,one,1,one,4
1,foo,one,1,one,5
2,foo,two,2,one,4
3,foo,two,2,one,5
4,bar,one,3,one,6
5,bar,one,3,two,7


In [61]:
pd.merge(left,right,on=("key1"))

Unnamed: 0,key1,key2_x,lval,key2_y,rval
0,foo,one,1,one,4
1,foo,one,1,one,5
2,foo,two,2,one,4
3,foo,two,2,one,5
4,bar,one,3,one,6
5,bar,one,3,two,7


In [62]:
pd.merge(left,right, on="key1", how="inner", suffixes=["_left","_right"])

Unnamed: 0,key1,key2_left,lval,key2_right,rval
0,foo,one,1,one,4
1,foo,one,1,one,5
2,foo,two,2,one,4
3,foo,two,2,one,5
4,bar,one,3,one,6
5,bar,one,3,two,7


In [63]:
left1 = pd.DataFrame({'key': ['a', 'b', 'a', 'a', 'b', 'c'],'value': range(6)})
left1

Unnamed: 0,key,value
0,a,0
1,b,1
2,a,2
3,a,3
4,b,4
5,c,5


In [64]:
right1 = pd.DataFrame({'group_val': [3.5, 7]}, index=['a', 'b'])
right1

Unnamed: 0,group_val
a,3.5
b,7.0


In [65]:
pd.merge(left1,right1,left_on=["key"],right_index=True) #Intersection performed.

Unnamed: 0,key,value,group_val
0,a,0,3.5
2,a,2,3.5
3,a,3,3.5
1,b,1,7.0
4,b,4,7.0


## Class Revision

In [66]:
superheroes = pd.Series(["Batman","Superman","Flash","Spider-Man","Wolverine"],
                        index=[["DC","DC","DC","Marvel","Marvel"],["DC_Hero","DC_Hero","DC_Hero","Marvel_Hero",
                                                                   "Marvel_Hero"]])
superheroes

DC      DC_Hero            Batman
        DC_Hero          Superman
        DC_Hero             Flash
Marvel  Marvel_Hero    Spider-Man
        Marvel_Hero     Wolverine
dtype: object

In [67]:
superheroes.to_frame()

Unnamed: 0,Unnamed: 1,0
DC,DC_Hero,Batman
DC,DC_Hero,Superman
DC,DC_Hero,Flash
Marvel,Marvel_Hero,Spider-Man
Marvel,Marvel_Hero,Wolverine


In [68]:
# Tasks:- Make gigs on Fiverr
# Web Scrapping.

### Continuing with Merge on Index

In [69]:
selected_dc_heroes = dc_heroes.loc[1:10]
selected_dc_heroes

Unnamed: 0,page_id,name,urlslug,ID,ALIGN,EYE,HAIR,SEX,GSM,ALIVE,APPEARANCES,FIRST APPEARANCE,YEAR
1,23387,Superman (Clark Kent),\/wiki\/Superman_(Clark_Kent),Secret Identity,Good Characters,Blue Eyes,Black Hair,Male Characters,,Living Characters,2496.0,"1986, October",1986.0
2,1458,Green Lantern (Hal Jordan),\/wiki\/Green_Lantern_(Hal_Jordan),Secret Identity,Good Characters,Brown Eyes,Brown Hair,Male Characters,,Living Characters,1565.0,"1959, October",1959.0
3,1659,James Gordon (New Earth),\/wiki\/James_Gordon_(New_Earth),Public Identity,Good Characters,Brown Eyes,White Hair,Male Characters,,Living Characters,1316.0,"1987, February",1987.0
4,1576,Richard Grayson (New Earth),\/wiki\/Richard_Grayson_(New_Earth),Secret Identity,Good Characters,Blue Eyes,Black Hair,Male Characters,,Living Characters,1237.0,"1940, April",1940.0
5,1448,Wonder Woman (Diana Prince),\/wiki\/Wonder_Woman_(Diana_Prince),Public Identity,Good Characters,Blue Eyes,Black Hair,Female Characters,,Living Characters,1231.0,"1941, December",1941.0
6,1486,Aquaman (Arthur Curry),\/wiki\/Aquaman_(Arthur_Curry),Public Identity,Good Characters,Blue Eyes,Blond Hair,Male Characters,,Living Characters,1121.0,"1941, November",1941.0
7,1451,Timothy Drake (New Earth),\/wiki\/Timothy_Drake_(New_Earth),Secret Identity,Good Characters,Blue Eyes,Black Hair,Male Characters,,Living Characters,1095.0,"1989, August",1989.0
8,71760,Dinah Laurel Lance (New Earth),\/wiki\/Dinah_Laurel_Lance_(New_Earth),Public Identity,Good Characters,Blue Eyes,Blond Hair,Female Characters,,Living Characters,1075.0,"1969, November",1969.0
9,1380,Flash (Barry Allen),\/wiki\/Flash_(Barry_Allen),Secret Identity,Good Characters,Blue Eyes,Blond Hair,Male Characters,,Living Characters,1028.0,"1956, October",1956.0
10,403631,GenderTest,\/wiki\/GenderTest,Secret Identity,Good Characters,Blue Eyes,Blond Hair,Female Characters,,Living Characters,1028.0,"1956, October",1956.0


In [70]:
selected_marvel_heroes = marvel_heroes.head(10)
selected_marvel_heroes

Unnamed: 0,page_id,name,urlslug,ID,ALIGN,EYE,HAIR,SEX,GSM,ALIVE,APPEARANCES,FIRST APPEARANCE,Year
0,1678,Spider-Man (Peter Parker),\/Spider-Man_(Peter_Parker),Secret Identity,Good Characters,Hazel Eyes,Brown Hair,Male Characters,,Living Characters,4043.0,Aug-62,1962.0
1,7139,Captain America (Steven Rogers),\/Captain_America_(Steven_Rogers),Public Identity,Good Characters,Blue Eyes,White Hair,Male Characters,,Living Characters,3360.0,Mar-41,1941.0
2,64786,"Wolverine (James \""Logan\"" Howlett)",\/Wolverine_(James_%22Logan%22_Howlett),Public Identity,Neutral Characters,Blue Eyes,Black Hair,Male Characters,,Living Characters,3061.0,Oct-74,1974.0
3,1868,"Iron Man (Anthony \""Tony\"" Stark)",\/Iron_Man_(Anthony_%22Tony%22_Stark),Public Identity,Good Characters,Blue Eyes,Black Hair,Male Characters,,Living Characters,2961.0,Mar-63,1963.0
4,2460,Thor (Thor Odinson),\/Thor_(Thor_Odinson),No Dual Identity,Good Characters,Blue Eyes,Blond Hair,Male Characters,,Living Characters,2258.0,Nov-50,1950.0
5,2458,Benjamin Grimm (Earth-616),\/Benjamin_Grimm_(Earth-616),Public Identity,Good Characters,Blue Eyes,No Hair,Male Characters,,Living Characters,2255.0,Nov-61,1961.0
6,2166,Reed Richards (Earth-616),\/Reed_Richards_(Earth-616),Public Identity,Good Characters,Brown Eyes,Brown Hair,Male Characters,,Living Characters,2072.0,Nov-61,1961.0
7,1833,Hulk (Robert Bruce Banner),\/Hulk_(Robert_Bruce_Banner),Public Identity,Good Characters,Brown Eyes,Brown Hair,Male Characters,,Living Characters,2017.0,May-62,1962.0
8,29481,Scott Summers (Earth-616),\/Scott_Summers_(Earth-616),Public Identity,Neutral Characters,Brown Eyes,Brown Hair,Male Characters,,Living Characters,1955.0,Sep-63,1963.0
9,1837,Jonathan Storm (Earth-616),\/Jonathan_Storm_(Earth-616),Public Identity,Good Characters,Blue Eyes,Blond Hair,Male Characters,,Living Characters,1934.0,Nov-61,1961.0


In [71]:
del selected_dc_heroes["GSM"]
del selected_marvel_heroes["GSM"]

In [72]:
selected_dc_heroes

Unnamed: 0,page_id,name,urlslug,ID,ALIGN,EYE,HAIR,SEX,ALIVE,APPEARANCES,FIRST APPEARANCE,YEAR
1,23387,Superman (Clark Kent),\/wiki\/Superman_(Clark_Kent),Secret Identity,Good Characters,Blue Eyes,Black Hair,Male Characters,Living Characters,2496.0,"1986, October",1986.0
2,1458,Green Lantern (Hal Jordan),\/wiki\/Green_Lantern_(Hal_Jordan),Secret Identity,Good Characters,Brown Eyes,Brown Hair,Male Characters,Living Characters,1565.0,"1959, October",1959.0
3,1659,James Gordon (New Earth),\/wiki\/James_Gordon_(New_Earth),Public Identity,Good Characters,Brown Eyes,White Hair,Male Characters,Living Characters,1316.0,"1987, February",1987.0
4,1576,Richard Grayson (New Earth),\/wiki\/Richard_Grayson_(New_Earth),Secret Identity,Good Characters,Blue Eyes,Black Hair,Male Characters,Living Characters,1237.0,"1940, April",1940.0
5,1448,Wonder Woman (Diana Prince),\/wiki\/Wonder_Woman_(Diana_Prince),Public Identity,Good Characters,Blue Eyes,Black Hair,Female Characters,Living Characters,1231.0,"1941, December",1941.0
6,1486,Aquaman (Arthur Curry),\/wiki\/Aquaman_(Arthur_Curry),Public Identity,Good Characters,Blue Eyes,Blond Hair,Male Characters,Living Characters,1121.0,"1941, November",1941.0
7,1451,Timothy Drake (New Earth),\/wiki\/Timothy_Drake_(New_Earth),Secret Identity,Good Characters,Blue Eyes,Black Hair,Male Characters,Living Characters,1095.0,"1989, August",1989.0
8,71760,Dinah Laurel Lance (New Earth),\/wiki\/Dinah_Laurel_Lance_(New_Earth),Public Identity,Good Characters,Blue Eyes,Blond Hair,Female Characters,Living Characters,1075.0,"1969, November",1969.0
9,1380,Flash (Barry Allen),\/wiki\/Flash_(Barry_Allen),Secret Identity,Good Characters,Blue Eyes,Blond Hair,Male Characters,Living Characters,1028.0,"1956, October",1956.0
10,403631,GenderTest,\/wiki\/GenderTest,Secret Identity,Good Characters,Blue Eyes,Blond Hair,Female Characters,Living Characters,1028.0,"1956, October",1956.0


In [73]:
selected_marvel_heroes

Unnamed: 0,page_id,name,urlslug,ID,ALIGN,EYE,HAIR,SEX,ALIVE,APPEARANCES,FIRST APPEARANCE,Year
0,1678,Spider-Man (Peter Parker),\/Spider-Man_(Peter_Parker),Secret Identity,Good Characters,Hazel Eyes,Brown Hair,Male Characters,Living Characters,4043.0,Aug-62,1962.0
1,7139,Captain America (Steven Rogers),\/Captain_America_(Steven_Rogers),Public Identity,Good Characters,Blue Eyes,White Hair,Male Characters,Living Characters,3360.0,Mar-41,1941.0
2,64786,"Wolverine (James \""Logan\"" Howlett)",\/Wolverine_(James_%22Logan%22_Howlett),Public Identity,Neutral Characters,Blue Eyes,Black Hair,Male Characters,Living Characters,3061.0,Oct-74,1974.0
3,1868,"Iron Man (Anthony \""Tony\"" Stark)",\/Iron_Man_(Anthony_%22Tony%22_Stark),Public Identity,Good Characters,Blue Eyes,Black Hair,Male Characters,Living Characters,2961.0,Mar-63,1963.0
4,2460,Thor (Thor Odinson),\/Thor_(Thor_Odinson),No Dual Identity,Good Characters,Blue Eyes,Blond Hair,Male Characters,Living Characters,2258.0,Nov-50,1950.0
5,2458,Benjamin Grimm (Earth-616),\/Benjamin_Grimm_(Earth-616),Public Identity,Good Characters,Blue Eyes,No Hair,Male Characters,Living Characters,2255.0,Nov-61,1961.0
6,2166,Reed Richards (Earth-616),\/Reed_Richards_(Earth-616),Public Identity,Good Characters,Brown Eyes,Brown Hair,Male Characters,Living Characters,2072.0,Nov-61,1961.0
7,1833,Hulk (Robert Bruce Banner),\/Hulk_(Robert_Bruce_Banner),Public Identity,Good Characters,Brown Eyes,Brown Hair,Male Characters,Living Characters,2017.0,May-62,1962.0
8,29481,Scott Summers (Earth-616),\/Scott_Summers_(Earth-616),Public Identity,Neutral Characters,Brown Eyes,Brown Hair,Male Characters,Living Characters,1955.0,Sep-63,1963.0
9,1837,Jonathan Storm (Earth-616),\/Jonathan_Storm_(Earth-616),Public Identity,Good Characters,Blue Eyes,Blond Hair,Male Characters,Living Characters,1934.0,Nov-61,1961.0


In [74]:
left1

Unnamed: 0,key,value
0,a,0
1,b,1
2,a,2
3,a,3
4,b,4
5,c,5


In [75]:
right1

Unnamed: 0,group_val
a,3.5
b,7.0


In [76]:
pd.merge(left1,right1,left_on="key",right_index=True)

Unnamed: 0,key,value,group_val
0,a,0,3.5
2,a,2,3.5
3,a,3,3.5
1,b,1,7.0
4,b,4,7.0


In [77]:
pd.merge(left1,right1, left_on="key", right_index=True, how="outer")

Unnamed: 0,key,value,group_val
0,a,0,3.5
2,a,2,3.5
3,a,3,3.5
1,b,1,7.0
4,b,4,7.0
5,c,5,


In [78]:
pd.merge(left1,right1, left_on="key", right_index=True, how="left")

Unnamed: 0,key,value,group_val
0,a,0,3.5
1,b,1,7.0
2,a,2,3.5
3,a,3,3.5
4,b,4,7.0
5,c,5,


In [79]:
pd.merge(left1,right1, left_on="key", right_index=True, how="right")

Unnamed: 0,key,value,group_val
0,a,0,3.5
2,a,2,3.5
3,a,3,3.5
1,b,1,7.0
4,b,4,7.0


# Concatenating Along an Axis

In [84]:
arr = np.arange(12).reshape((3, 4))
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [86]:
np.concatenate((arr,arr))

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [95]:
np.concatenate((arr,arr),axis=1)
# pd.concat((arr,arr),axis=1) #TypeError: cannot concatenate object of type '<class 'numpy.ndarray'>'; only Series and DataFrame objs are valid


array([[ 0,  1,  2,  3,  0,  1,  2,  3],
       [ 4,  5,  6,  7,  4,  5,  6,  7],
       [ 8,  9, 10, 11,  8,  9, 10, 11]])

In [96]:
# Concatening series with no similar indexes.
s1 = pd.Series([0, 1], index=['a', 'b'])
s2 = pd.Series([2, 3, 4], index=['c', 'd', 'e'])
s3 = pd.Series([5, 6], index=['f', 'g'])

pd.concat([s1,s2,s3])

a    0
b    1
c    2
d    3
e    4
f    5
g    6
dtype: int64

In [97]:
# We can just concat data of passed pandas object using np.concatenate.
np.concatenate([s1,s2,s3])

array([0, 1, 2, 3, 4, 5, 6])

In [101]:
s1

a    0
b    1
dtype: int64

In [102]:
s2

c    2
d    3
e    4
dtype: int64

In [103]:
s3

f    5
g    6
dtype: int64

In [98]:
# Producing a dataframe when changing axis from 0 to 1.
pd.concat([s1,s2,s3],axis=1)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  


Unnamed: 0,0,1,2
a,0.0,,
b,1.0,,
c,,2.0,
d,,3.0,
e,,4.0,
f,,,5.0
g,,,6.0


In [99]:
# Producing a dataframe when changing axis from 0 to 1.
pd.concat([s1,s2,s3],axis=1,sort=False)

Unnamed: 0,0,1,2
a,0.0,,
b,1.0,,
c,,2.0,
d,,3.0,
e,,4.0,
f,,,5.0
g,,,6.0


In [100]:
# Producing a dataframe when changing axis from 0 to 1.
pd.concat([s1,s2,s3],axis=1,sort=True)

Unnamed: 0,0,1,2
a,0.0,,
b,1.0,,
c,,2.0,
d,,3.0,
e,,4.0,
f,,,5.0
g,,,6.0


In [105]:
# Again with inner join.
pd.concat([s1,s2,s3],axis=1,sort=False,join="inner")

Unnamed: 0,0,1,2


### Again with Inner join.

In [107]:
s4 = pd.concat([s1, s3])
s4

a    0
b    1
f    5
g    6
dtype: int64

In [109]:
print(s1)
pd.concat((s1,s4),join="inner")

a    0
b    1
dtype: int64


a    0
b    1
a    0
b    1
f    5
g    6
dtype: int64

In [110]:
print(s1)
pd.concat((s1,s4),axis=1,join="inner")

a    0
b    1
dtype: int64


Unnamed: 0,0,1
a,0,0
b,1,1


In [None]:
pd.concat([s1, s4], axis=1, join_axes=[['a', 'c', 'b', 'e']])