In [1]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
df = pd.DataFrame({'A':[1,2,3,4,np.nan,30],'B':[5,6,7,np.nan,np.nan,31],'C':[8,9,np.nan,np.nan,np.nan,32],'D':[10,np.nan,np.nan,np.nan,np.nan,33],'E':[np.nan,np.nan,np.nan,np.nan,np.nan,34],'F':[20,21,22,23,24,35]})

In [3]:
df

Unnamed: 0,A,B,C,D,E,F
0,1.0,5.0,8.0,10.0,,20
1,2.0,6.0,9.0,,,21
2,3.0,7.0,,,,22
3,4.0,,,,,23
4,,,,,,24
5,30.0,31.0,32.0,33.0,34.0,35


In [4]:
#axis=0 - rows
df.dropna(axis=0,thresh=3) #thresh represents number of integers present

Unnamed: 0,A,B,C,D,E,F
0,1.0,5.0,8.0,10.0,,20
1,2.0,6.0,9.0,,,21
2,3.0,7.0,,,,22
5,30.0,31.0,32.0,33.0,34.0,35


In [6]:
df = pd.DataFrame({'A':[1,2,np.nan],'B':[5,np.nan,np.nan],'C':[1,2,3]})
df['States']="CA NV AZ".split()
df.set_index('States',inplace=True)
print(df)

          A    B  C
States             
CA      1.0  5.0  1
NV      2.0  NaN  2
AZ      NaN  NaN  3


In [7]:
print("\nDropping any rows with a NaN value\n",'-'*35, sep='')
print(df.dropna(axis=0))


Dropping any rows with a NaN value
-----------------------------------
          A    B  C
States             
CA      1.0  5.0  1


In [8]:
print("\nDropping any column with a NaN value\n",'-'*35, sep='')
print(df.dropna(axis=1))


Dropping any column with a NaN value
-----------------------------------
        C
States   
CA      1
NV      2
AZ      3


In [9]:
print("\nDropping a row with a minimum 2 NaN value using 'thresh' parameter\n",'-'*68, sep='')
print(df.dropna(axis=0, thresh=1))


Dropping a row with a minimum 2 NaN value using 'thresh' parameter
--------------------------------------------------------------------
          A    B  C
States             
CA      1.0  5.0  1
NV      2.0  NaN  2
AZ      NaN  NaN  3


In [10]:
print("\nFilling values with a default value\n",'-'*35, sep='')
print(df.fillna(value='FILL VALUE'))


Filling values with a default value
-----------------------------------
                 A           B  C
States                           
CA             1.0         5.0  1
NV             2.0  FILL VALUE  2
AZ      FILL VALUE  FILL VALUE  3


In [11]:
print("\nFilling values with a computed value (mean of column A here)\n",'-'*60, sep='')
print(df.fillna(value=df['A'].mean()))


Filling values with a computed value (mean of column A here)
------------------------------------------------------------
          A    B  C
States             
CA      1.0  5.0  1
NV      2.0  1.5  2
AZ      1.5  1.5  3


In [12]:
# Create dataframe
data = {'Company':['GOOG','GOOG','MSFT','MSFT','FB','FB'],
       'Person':['Sam','Charlie','Amy','Vanessa','Carl','Sarah'],
       'Sales':[200,120,340,124,243,350]}
df = pd.DataFrame(data)
df

Unnamed: 0,Company,Person,Sales
0,GOOG,Sam,200
1,GOOG,Charlie,120
2,MSFT,Amy,340
3,MSFT,Vanessa,124
4,FB,Carl,243
5,FB,Sarah,350


In [13]:
byComp = df.groupby('Company')
print("\nGrouping by 'Company' column and listing mean sales\n",'-'*55, sep='')
print(byComp.mean())


Grouping by 'Company' column and listing mean sales
-------------------------------------------------------
         Sales
Company       
FB       296.5
GOOG     160.0
MSFT     232.0


In [14]:
print("\nGrouping by 'Company' column and listing sum of sales\n",'-'*55, sep='')
print(byComp.sum())


Grouping by 'Company' column and listing sum of sales
-------------------------------------------------------
         Sales
Company       
FB         593
GOOG       320
MSFT       464


In [20]:
print("\nAll in one line of command (Stats for 'FB')\n",'-'*65, sep='')
print(pd.DataFrame(df.groupby('Company').describe().loc['FB']).transpose())


All in one line of command (Stats for 'FB')
-----------------------------------------------------------------
   Sales                                                       
   count   mean        std    min     25%    50%     75%    max
FB   2.0  296.5  75.660426  243.0  269.75  296.5  323.25  350.0


In [21]:
print("\nSame type of extraction with little different command\n",'-'*68, sep='')
print(df.groupby('Company').describe().loc[['GOOG', 'MSFT']])


Same type of extraction with little different command
--------------------------------------------------------------------
        Sales                                                      
        count   mean         std    min    25%    50%    75%    max
Company                                                            
GOOG      2.0  160.0   56.568542  120.0  140.0  160.0  180.0  200.0
MSFT      2.0  232.0  152.735065  124.0  178.0  232.0  286.0  340.0


In [22]:
# Merging two data frames
# Creating data frames
import pandas as pd
import numpy as np
df1 = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],
                        'B': ['B0', 'B1', 'B2', 'B3'],
                        'C': ['C0', 'C1', 'C2', 'C3'],
                        'D': ['D0', 'D1', 'D2', 'D3']},
                        index=[0, 1, 2, 3])

In [7]:
x={'a':2,'b':5,'c':3}
y={'a':3,'b':2,'d':2}
z={**x,**y}
z

{'a': 3, 'b': 2, 'c': 3, 'd': 2}

In [23]:
df1

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3


In [24]:
df2 = pd.DataFrame({'A': ['A4', 'A5', 'A6', 'A7'],
                        'B': ['B4', 'B5', 'B6', 'B7'],
                        'C': ['C4', 'C5', 'C6', 'C7'],
                        'D': ['D4', 'D5', 'D6', 'D7']},
                         index=[4, 5, 6, 7])

In [25]:
df2

Unnamed: 0,A,B,C,D
4,A4,B4,C4,D4
5,A5,B5,C5,D5
6,A6,B6,C6,D6
7,A7,B7,C7,D7


In [26]:
df3 = pd.DataFrame({'A': ['A8', 'A9', 'A10', 'A11'],
                        'B': ['B8', 'B9', 'B10', 'B11'],
                        'C': ['C8', 'C9', 'C10', 'C11'],
                        'D': ['D8', 'D9', 'D10', 'D11']},
                        index=[8,9,10,11])

In [27]:
df3

Unnamed: 0,A,B,C,D
8,A8,B8,C8,D8
9,A9,B9,C9,D9
10,A10,B10,C10,D10
11,A11,B11,C11,D11


In [28]:
print("\nThe DataFrame number 1\n",'-'*30, sep='')
print(df1)



The DataFrame number 1
------------------------------
    A   B   C   D
0  A0  B0  C0  D0
1  A1  B1  C1  D1
2  A2  B2  C2  D2
3  A3  B3  C3  D3


In [29]:
print("\nThe DataFrame number 2\n",'-'*30, sep='')
print(df2)



The DataFrame number 2
------------------------------
    A   B   C   D
4  A4  B4  C4  D4
5  A5  B5  C5  D5
6  A6  B6  C6  D6
7  A7  B7  C7  D7


In [30]:
print("\nThe DataFrame number 3\n",'-'*30, sep='')
print(df3)


The DataFrame number 3
------------------------------
      A    B    C    D
8    A8   B8   C8   D8
9    A9   B9   C9   D9
10  A10  B10  C10  D10
11  A11  B11  C11  D11


In [31]:
#concatenation
#axis=0 -> rows
df_cat1 = pd.concat([df1,df2,df3], axis=0)
print("\nAfter concatenation along row\n",'-'*30, sep='')
print(df_cat1)



After concatenation along row
------------------------------
      A    B    C    D
0    A0   B0   C0   D0
1    A1   B1   C1   D1
2    A2   B2   C2   D2
3    A3   B3   C3   D3
4    A4   B4   C4   D4
5    A5   B5   C5   D5
6    A6   B6   C6   D6
7    A7   B7   C7   D7
8    A8   B8   C8   D8
9    A9   B9   C9   D9
10  A10  B10  C10  D10
11  A11  B11  C11  D11


In [33]:
df_cat2 = pd.concat([df1,df2,df3], axis=1)
print("\nAfter concatenation along column\n",'-'*60, sep='')
print(df_cat2)


After concatenation along column
------------------------------------------------------------
      A    B    C    D    A    B    C    D    A    B    C    D
0    A0   B0   C0   D0  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN
1    A1   B1   C1   D1  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN
2    A2   B2   C2   D2  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN
3    A3   B3   C3   D3  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN
4   NaN  NaN  NaN  NaN   A4   B4   C4   D4  NaN  NaN  NaN  NaN
5   NaN  NaN  NaN  NaN   A5   B5   C5   D5  NaN  NaN  NaN  NaN
6   NaN  NaN  NaN  NaN   A6   B6   C6   D6  NaN  NaN  NaN  NaN
7   NaN  NaN  NaN  NaN   A7   B7   C7   D7  NaN  NaN  NaN  NaN
8   NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN   A8   B8   C8   D8
9   NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN   A9   B9   C9   D9
10  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  A10  B10  C10  D10
11  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  A11  B11  C11  D11


In [35]:
df_cat2.fillna(value=0, inplace=True)
print("\nAfter filling missing values with zero\n",'-'*60, sep='')
print(df_cat2)


After filling missing values with zero
------------------------------------------------------------
     A   B   C   D   A   B   C   D    A    B    C    D
0   A0  B0  C0  D0   0   0   0   0    0    0    0    0
1   A1  B1  C1  D1   0   0   0   0    0    0    0    0
2   A2  B2  C2  D2   0   0   0   0    0    0    0    0
3   A3  B3  C3  D3   0   0   0   0    0    0    0    0
4    0   0   0   0  A4  B4  C4  D4    0    0    0    0
5    0   0   0   0  A5  B5  C5  D5    0    0    0    0
6    0   0   0   0  A6  B6  C6  D6    0    0    0    0
7    0   0   0   0  A7  B7  C7  D7    0    0    0    0
8    0   0   0   0   0   0   0   0   A8   B8   C8   D8
9    0   0   0   0   0   0   0   0   A9   B9   C9   D9
10   0   0   0   0   0   0   0   0  A10  B10  C10  D10
11   0   0   0   0   0   0   0   0  A11  B11  C11  D11


In [25]:
# merging by a common key

In [36]:
import pandas as pd
import numpy as np
left = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],
                     'A': ['A0', 'A1', 'A2', 'A3'],
                     'B': ['B0', 'B1', 'B2', 'B3']})
   
right = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],
                          'C': ['C0', 'C1', 'C2', 'C3'],
                          'D': ['D0', 'D1', 'D2', 'D3']})


In [37]:
left

Unnamed: 0,key,A,B
0,K0,A0,B0
1,K1,A1,B1
2,K2,A2,B2
3,K3,A3,B3


In [38]:
right

Unnamed: 0,key,C,D
0,K0,C0,D0
1,K1,C1,D1
2,K2,C2,D2
3,K3,C3,D3


In [39]:
print("\nThe DataFrame 'left'\n",'-'*30, sep='')
print(left)


The DataFrame 'left'
------------------------------
  key   A   B
0  K0  A0  B0
1  K1  A1  B1
2  K2  A2  B2
3  K3  A3  B3


In [40]:
print("\nThe DataFrame 'right'\n",'-'*30, sep='')
print(right)


The DataFrame 'right'
------------------------------
  key   C   D
0  K0  C0  D0
1  K1  C1  D1
2  K2  C2  D2
3  K3  C3  D3


In [41]:
merge1= pd.merge(right,left,how='inner',on='key')
print("\nAfter simple merging with 'inner' method\n",'-'*50, sep='')
print(merge1)


After simple merging with 'inner' method
--------------------------------------------------
  key   C   D   A   B
0  K0  C0  D0  A0  B0
1  K1  C1  D1  A1  B1
2  K2  C2  D2  A2  B2
3  K3  C3  D3  A3  B3


In [42]:
left = pd.DataFrame({'key1': ['K0', 'K0', 'K1', 'K2'],
                     'key2': ['K0', 'K1', 'K0', 'K1'],
                        'A': ['A0', 'A1', 'A2', 'A3'],
                        'B': ['B0', 'B1', 'B2', 'B3']})
    
right = pd.DataFrame({'key1': ['K0', 'K1', 'K1', 'K2'],
                               'key2': ['K0', 'K0', 'K0', 'K0'],
                                  'C': ['C0', 'C1', 'C2', 'C3'],
                                  'D': ['D0', 'D1', 'D2', 'D3']})

In [43]:
left

Unnamed: 0,key1,key2,A,B
0,K0,K0,A0,B0
1,K0,K1,A1,B1
2,K1,K0,A2,B2
3,K2,K1,A3,B3


In [44]:
right

Unnamed: 0,key1,key2,C,D
0,K0,K0,C0,D0
1,K1,K0,C1,D1
2,K1,K0,C2,D2
3,K2,K0,C3,D3


In [12]:
pd.merge(left, right, on=['key1', 'key2'])

Unnamed: 0,key1,key2,A,B,C,D
0,K0,K0,A0,B0,C0,D0
1,K1,K0,A2,B2,C1,D1
2,K1,K0,A2,B2,C2,D2


In [46]:
pd.merge(left, right, how='left',on=['key1', 'key2'])

Unnamed: 0,key1,key2,A,B,C,D
0,K0,K0,A0,B0,C0,D0
1,K0,K1,A1,B1,,
2,K1,K0,A2,B2,C1,D1
3,K1,K0,A2,B2,C2,D2
4,K2,K1,A3,B3,,


In [45]:
pd.merge(left, right, how='right',on=['key1', 'key2'])

Unnamed: 0,key1,key2,A,B,C,D
0,K0,K0,A0,B0,C0,D0
1,K1,K0,A2,B2,C1,D1
2,K1,K0,A2,B2,C2,D2
3,K2,K0,,,C3,D3


In [47]:
#join operators
left = pd.DataFrame({'A': ['A0', 'A1', 'A2'],
                     'B': ['B0', 'B1', 'B2']},
                      index=['K0', 'K1', 'K2']) 

right = pd.DataFrame({'C': ['C0', 'C2', 'C3'],
                    'D': ['D0', 'D2', 'D3']},
                      index=['K0', 'K2', 'K3'])

In [48]:
left

Unnamed: 0,A,B
K0,A0,B0
K1,A1,B1
K2,A2,B2


In [49]:
right

Unnamed: 0,C,D
K0,C0,D0
K2,C2,D2
K3,C3,D3


In [26]:
left.join(right)

Unnamed: 0,A,B,C,D
K0,A0,B0,C0,D0
K1,A1,B1,,
K2,A2,B2,C2,D2


In [42]:
left.join(right, how='outer')

Unnamed: 0,A,B,C,D
K0,A0,B0,C0,D0
K1,A1,B1,,
K2,A2,B2,C2,D2
K3,,,C3,D3


# Creating a DataFrame using nested lists

In [59]:
import pandas as pd
left = pd.DataFrame([['a', 1,11], ['b', 2,22],['c', 3,33]], list('XYZ'), list('ABC'))
left

Unnamed: 0,A,B,C
X,a,1,11
Y,b,2,22
Z,c,3,33


In [60]:
import pandas as pd
left = pd.DataFrame([['a', 1], ['b', 2]], [['a', 'b'], [1, 2]], list('AB'))
left

Unnamed: 0,Unnamed: 1,A,B
a,1,a,1
b,2,b,2


In [61]:
import pandas as pd
left = pd.DataFrame([['a', 1], ['b', 2]], [['a', 'b'], [1, 2]],[['A', 'B'], ["C", "D"]])
left

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Unnamed: 0_level_1,Unnamed: 1_level_1,C,D
a,1,a,1
b,2,b,2


In [1]:
# use of apply functions

In [62]:
import pandas as pd
df = pd.DataFrame({'col1':[1,2,3,4,5,6,7,8,9,10],
                   'col2':[444,555,666,444,333,222,666,777,666,555],
                   'col3':'aaa bb c dd eeee fff gg h iii j'.split()})
df

Unnamed: 0,col1,col2,col3
0,1,444,aaa
1,2,555,bb
2,3,666,c
3,4,444,dd
4,5,333,eeee
5,6,222,fff
6,7,666,gg
7,8,777,h
8,9,666,iii
9,10,555,j


In [63]:
# Define a function
def testfunc(x):
    if (x> 500):
        return (10*np.log10(x))
    else:
        return (x/10)

In [64]:
df['FuncApplied'] = df['col1'].apply(testfunc)
print(df)

   col1  col2  col3  FuncApplied
0     1   444   aaa          0.1
1     2   555    bb          0.2
2     3   666     c          0.3
3     4   444    dd          0.4
4     5   333  eeee          0.5
5     6   222   fff          0.6
6     7   666    gg          0.7
7     8   777     h          0.8
8     9   666   iii          0.9
9    10   555     j          1.0


In [65]:
df['FuncApplied2'] = df['col2'].apply(testfunc)
print(df)

   col1  col2  col3  FuncApplied  FuncApplied2
0     1   444   aaa          0.1     44.400000
1     2   555    bb          0.2     27.442930
2     3   666     c          0.3     28.234742
3     4   444    dd          0.4     44.400000
4     5   333  eeee          0.5     33.300000
5     6   222   fff          0.6     22.200000
6     7   666    gg          0.7     28.234742
7     8   777     h          0.8     28.904210
8     9   666   iii          0.9     28.234742
9    10   555     j          1.0     27.442930


In [66]:
df['col3length']= df['col3'].apply(len)
print(df)

   col1  col2  col3  FuncApplied  FuncApplied2  col3length
0     1   444   aaa          0.1     44.400000           3
1     2   555    bb          0.2     27.442930           2
2     3   666     c          0.3     28.234742           1
3     4   444    dd          0.4     44.400000           2
4     5   333  eeee          0.5     33.300000           4
5     6   222   fff          0.6     22.200000           3
6     7   666    gg          0.7     28.234742           2
7     8   777     h          0.8     28.904210           1
8     9   666   iii          0.9     28.234742           3
9    10   555     j          1.0     27.442930           1


In [67]:
df1=df[df['col2']>300]
df['col4']=df1['col2'].cumsum()
df

Unnamed: 0,col1,col2,col3,FuncApplied,FuncApplied2,col3length,col4
0,1,444,aaa,0.1,44.4,3,444.0
1,2,555,bb,0.2,27.44293,2,999.0
2,3,666,c,0.3,28.234742,1,1665.0
3,4,444,dd,0.4,44.4,2,2109.0
4,5,333,eeee,0.5,33.3,4,2442.0
5,6,222,fff,0.6,22.2,3,
6,7,666,gg,0.7,28.234742,2,3108.0
7,8,777,h,0.8,28.90421,1,3885.0
8,9,666,iii,0.9,28.234742,3,4551.0
9,10,555,j,1.0,27.44293,1,5106.0


In [68]:
df['FuncApplied'].apply(lambda x: np.sqrt(x))

0    0.316228
1    0.447214
2    0.547723
3    0.632456
4    0.707107
5    0.774597
6    0.836660
7    0.894427
8    0.948683
9    1.000000
Name: FuncApplied, dtype: float64

In [69]:
print("\nSum of the column 'FuncApplied' is: ",df['FuncApplied'].sum())


Sum of the column 'FuncApplied' is:  5.5


In [70]:
print("Mean of the column 'FuncApplied' is: ",df['FuncApplied'].mean())


Mean of the column 'FuncApplied' is:  0.55


In [71]:
print("Std dev of the column 'FuncApplied' is: ",df['FuncApplied'].std())


Std dev of the column 'FuncApplied' is:  0.30276503540974914


In [72]:
print("Min and max of the column 'FuncApplied' are: ",df['FuncApplied'].min(),"and",df['FuncApplied'].max())

Min and max of the column 'FuncApplied' are:  0.1 and 1.0


In [61]:
### Deletion, sorting, list of column and row names

In [73]:
print("\nName of columns\n",'-'*20, sep='')
print(df.columns)



Name of columns
--------------------
Index(['col1', 'col2', 'col3', 'FuncApplied', 'FuncApplied2', 'col3length',
       'col4'],
      dtype='object')


In [74]:
l = list(df.columns)
print("\nColumn names in a list of strings for later manipulation:",l)


Column names in a list of strings for later manipulation: ['col1', 'col2', 'col3', 'FuncApplied', 'FuncApplied2', 'col3length', 'col4']


In [75]:
print("\nDeleting last column by 'del' command\n",'-'*50, sep='')
del df['col3length']
print(df)
df['col3length']= df['col3'].apply(len)


Deleting last column by 'del' command
--------------------------------------------------
   col1  col2  col3  FuncApplied  FuncApplied2    col4
0     1   444   aaa          0.1     44.400000   444.0
1     2   555    bb          0.2     27.442930   999.0
2     3   666     c          0.3     28.234742  1665.0
3     4   444    dd          0.4     44.400000  2109.0
4     5   333  eeee          0.5     33.300000  2442.0
5     6   222   fff          0.6     22.200000     NaN
6     7   666    gg          0.7     28.234742  3108.0
7     8   777     h          0.8     28.904210  3885.0
8     9   666   iii          0.9     28.234742  4551.0
9    10   555     j          1.0     27.442930  5106.0


In [76]:
df.sort_values(by='col2') #inplace=False by default

Unnamed: 0,col1,col2,col3,FuncApplied,FuncApplied2,col4,col3length
5,6,222,fff,0.6,22.2,,3
4,5,333,eeee,0.5,33.3,2442.0,4
0,1,444,aaa,0.1,44.4,444.0,3
3,4,444,dd,0.4,44.4,2109.0,2
1,2,555,bb,0.2,27.44293,999.0,2
9,10,555,j,1.0,27.44293,5106.0,1
2,3,666,c,0.3,28.234742,1665.0,1
6,7,666,gg,0.7,28.234742,3108.0,2
8,9,666,iii,0.9,28.234742,4551.0,3
7,8,777,h,0.8,28.90421,3885.0,1


In [78]:
df.sort_values(by='FuncApplied',ascending=False) #inplace=False by default

Unnamed: 0,col1,col2,col3,FuncApplied,FuncApplied2,col4,col3length
9,10,555,j,1.0,27.44293,5106.0,1
8,9,666,iii,0.9,28.234742,4551.0,3
7,8,777,h,0.8,28.90421,3885.0,1
6,7,666,gg,0.7,28.234742,3108.0,2
5,6,222,fff,0.6,22.2,,3
4,5,333,eeee,0.5,33.3,2442.0,4
3,4,444,dd,0.4,44.4,2109.0,2
2,3,666,c,0.3,28.234742,1665.0,1
1,2,555,bb,0.2,27.44293,999.0,2
0,1,444,aaa,0.1,44.4,444.0,3


In [79]:
df = pd.DataFrame({'col1':[1,2,3,np.nan],
                   'col2':[np.nan,555,666,444],
                   'col3':['abc','def','ghi','xyz']})
df.head()

Unnamed: 0,col1,col2,col3
0,1.0,,abc
1,2.0,555.0,def
2,3.0,666.0,ghi
3,,444.0,xyz


In [80]:
df.isnull()

Unnamed: 0,col1,col2,col3
0,False,True,False
1,False,False,False
2,False,False,False
3,True,False,False


In [81]:
df.fillna('FILL')

Unnamed: 0,col1,col2,col3
0,1.0,FILL,abc
1,2.0,555.0,def
2,3.0,666.0,ghi
3,FILL,444.0,xyz


In [82]:
df1

Unnamed: 0,col1,col2,col3,FuncApplied,FuncApplied2,col3length
0,1,444,aaa,0.1,44.4,3
1,2,555,bb,0.2,27.44293,2
2,3,666,c,0.3,28.234742,1
3,4,444,dd,0.4,44.4,2
4,5,333,eeee,0.5,33.3,4
6,7,666,gg,0.7,28.234742,2
7,8,777,h,0.8,28.90421,1
8,9,666,iii,0.9,28.234742,3
9,10,555,j,1.0,27.44293,1


In [83]:
df2

Unnamed: 0,A,B,C,D
4,A4,B4,C4,D4
5,A5,B5,C5,D5
6,A6,B6,C6,D6
7,A7,B7,C7,D7


In [84]:
df3

Unnamed: 0,A,B,C,D
8,A8,B8,C8,D8
9,A9,B9,C9,D9
10,A10,B10,C10,D10
11,A11,B11,C11,D11


In [85]:
pd.merge(df1, df2, how='inner')

MergeError: No common columns to perform merge on. Merge options: left_on=None, right_on=None, left_index=False, right_index=False

In [86]:
pd.merge(df1, df2, how='outer')

MergeError: No common columns to perform merge on. Merge options: left_on=None, right_on=None, left_index=False, right_index=False

In [87]:
pd.merge(df1, df2, how='left')

MergeError: No common columns to perform merge on. Merge options: left_on=None, right_on=None, left_index=False, right_index=False

In [88]:
pd.merge(df1, df2, how='right')

MergeError: No common columns to perform merge on. Merge options: left_on=None, right_on=None, left_index=False, right_index=False