In [1]:
import pandas as pd
import numpy as np

In [2]:
s1 = pd.Series([5,np.nan,6,np.nan], index=list('ABCD'))
print(s1)

A    5.0
B    NaN
C    6.0
D    NaN
dtype: float64


In [3]:
s2 = pd.Series(np.arange(4), index=list('ABCD'))
print(s2)

A    0
B    1
C    2
D    3
dtype: int32


In [4]:
s3 = pd.Series(np.where(pd.isnull(s1),s2,s1), index=s1.index)
print(s3)

A    5.0
B    1.0
C    6.0
D    3.0
dtype: float64


In [5]:
s4 = s1.combine_first(s2)
print(s4)

A    5.0
B    1.0
C    6.0
D    3.0
dtype: float64


In [6]:
#Combining 2 Dataframe

df1 = pd.DataFrame({
    'col1': [5,np.nan,15],
    'col2': [20,25,np.nan],
    'col3': [np.nan, np.nan, 25]
})
print(df1)

   col1  col2  col3
0   5.0  20.0   NaN
1   NaN  25.0   NaN
2  15.0   NaN  25.0


In [7]:
df2 = pd.DataFrame({
    'col1': [10,20,30],
    'col2': [30,40,50]
})
print(df2)

   col1  col2
0    10    30
1    20    40
2    30    50


In [8]:
df1.combine_first(df2)

Unnamed: 0,col1,col2,col3
0,5.0,20.0,
1,20.0,25.0,
2,15.0,50.0,25.0


## Reshaping Datasets - stack and unstack

In [9]:
df = pd.DataFrame(np.arange(8).reshape(2,4), index=pd.Index(['America','China']
                    ,name='countries'),columns=pd.Index(list('ABCD'),name='attributes'))
print(df)

attributes  A  B  C  D
countries             
America     0  1  2  3
China       4  5  6  7


In [10]:
#stacking and unstaking DataFrames
stackdf = df.stack()
print(stackdf)

countries  attributes
America    A             0
           B             1
           C             2
           D             3
China      A             4
           B             5
           C             6
           D             7
dtype: int32


In [11]:
unstackdf = stackdf.unstack()
print(unstackdf)

attributes  A  B  C  D
countries             
America     0  1  2  3
China       4  5  6  7


In [12]:
#unstack along column or index

df2 = stackdf.unstack('countries')
print(df2)

countries   America  China
attributes                
A                 0      4
B                 1      5
C                 2      6
D                 3      7


In [13]:
df3 = stackdf.unstack('attributes')
print(df3)

attributes  A  B  C  D
countries             
America     0  1  2  3
China       4  5  6  7


In [14]:
#stacking and unstacking series
s1 = pd.Series([5,10,15], index=list('ABC'))
s2 = pd.Series([20,25,30],index=list('BCD'))
print(s1)

A     5
B    10
C    15
dtype: int64


In [15]:
print(s2)

B    20
C    25
D    30
dtype: int64


In [16]:
s3 = pd.concat([s1,s2], keys=['k1', 'k2'])
print(s3)

k1  A     5
    B    10
    C    15
k2  B    20
    C    25
    D    30
dtype: int64


In [17]:
df_series = s3.unstack()
print(df_series)

      A     B     C     D
k1  5.0  10.0  15.0   NaN
k2  NaN  20.0  25.0  30.0


In [18]:
df_series.stack()

k1  A     5.0
    B    10.0
    C    15.0
k2  B    20.0
    C    25.0
    D    30.0
dtype: float64

In [19]:
df_series.stack(dropna=False)

k1  A     5.0
    B    10.0
    C    15.0
    D     NaN
k2  A     NaN
    B    20.0
    C    25.0
    D    30.0
dtype: float64