[Reference](https://towardsdatascience.com/less-known-but-very-useful-pandas-functions-1e1756afb2a9)

In [2]:
import pandas as pd
import numpy as np

# Explode

In [2]:
df = pd.DataFrame({'ID':['a','b','c'],
                  'measurement':[4,6,[2,3,8]],
                  'day':1})
df

Unnamed: 0,ID,measurement,day
0,a,4,1
1,b,6,1
2,c,"[2, 3, 8]",1


In [3]:
df.explode('measurement').reset_index(drop=True)

Unnamed: 0,ID,measurement,day
0,a,4,1
1,b,6,1
2,c,2,1
3,c,3,1
4,c,8,1


In [4]:
df2 = pd.DataFrame({'ID':['a','b','c'],
                   'measurement_1':[4,6,[2,3,8]],
                   'measurement_2':[1,[7,9],1],
                   'day':1})
df2

Unnamed: 0,ID,measurement_1,measurement_2,day
0,a,4,1,1
1,b,6,"[7, 9]",1
2,c,"[2, 3, 8]",1,1


In [5]:
df2.explode('measurement_1').reset_index(drop=True).explode('measurement_2').reset_index(drop=True)

Unnamed: 0,ID,measurement_1,measurement_2,day
0,a,4,1,1
1,b,6,7,1
2,b,6,9,1
3,c,2,1,1
4,c,3,1,1
5,c,8,1,1


In [6]:
df2.explode('measurement_1').explode('measurement_2').reset_index(drop=True)

Unnamed: 0,ID,measurement_1,measurement_2,day
0,a,4,1,1
1,b,6,7,1
2,b,6,9,1
3,c,2,1,1
4,c,3,1,1
5,c,8,1,1


# Nunique

In [3]:
df3 = pd.DataFrame({'ID':[1,2,3,4,5,6,7,8],
                    'name':['John','Alex','Alex','Alex','Oscar','Derek','Derek','Will'],
                   'measurement_1':[4,7,8,9,2,6,6,5],
                   'measurement_2':[4,6,9,2,6,6,5,5],
                   'measurement_3':[4,2,3,5,1,4,5,9]})
df3

Unnamed: 0,ID,name,measurement_1,measurement_2,measurement_3
0,1,John,4,4,4
1,2,Alex,7,6,2
2,3,Alex,8,9,3
3,4,Alex,9,2,5
4,5,Oscar,2,6,1
5,6,Derek,6,6,4
6,7,Derek,6,5,5
7,8,Will,5,5,9


In [4]:
df3.name.nunique()

5

In [5]:
df3.name.value_counts().shape[0]

5

In [6]:
df3.nunique()

ID               8
name             5
measurement_1    7
measurement_2    5
measurement_3    6
dtype: int64

In [7]:
df3.nunique(axis=1)

0    3
1    4
2    4
3    5
4    5
5    3
6    4
7    4
dtype: int64

# Lookup

In [9]:
df4 = pd.DataFrame({'Day':[1,2,3,4,5,6,7,8],
                    'Person':['Alex','John','Alex','Derek','Oscar','John','Derek','Oscar'],
                   'John':[4,7,8,9,2,6,6,5],
                   'Alex':[4,6,9,2,6,6,5,5],
                   'Oscar':[4,2,3,5,1,4,5,9],
                   'Derek':[6,2,1,8,7,8,4,5]})
df4

Unnamed: 0,Day,Person,John,Alex,Oscar,Derek
0,1,Alex,4,4,4,6
1,2,John,7,6,2,2
2,3,Alex,8,9,3,1
3,4,Derek,9,2,5,8
4,5,Oscar,2,6,1,7
5,6,John,6,6,4,8
6,7,Derek,6,5,5,4
7,8,Oscar,5,5,9,5


In [10]:
df4['Person_point'] = df4.lookup(df4.index, df4['Person'])
df4

Unnamed: 0,Day,Person,John,Alex,Oscar,Derek,Person_point
0,1,Alex,4,4,4,6,4
1,2,John,7,6,2,2,7
2,3,Alex,8,9,3,1,9
3,4,Derek,9,2,5,8,8
4,5,Oscar,2,6,1,7,1
5,6,John,6,6,4,8,6
6,7,Derek,6,5,5,4,4
7,8,Oscar,5,5,9,5,9


In [11]:
df4.lookup(df4.index[:5], df4['Person'][:5])

array([4, 7, 9, 8, 1])

# Where

In [12]:
df4['Person_point'].where(df4['Person_point'] > 5)

0    NaN
1    7.0
2    9.0
3    8.0
4    NaN
5    6.0
6    NaN
7    9.0
Name: Person_point, dtype: float64

In [13]:
df4['Person_point'].where(df4['Person_point'] > 5, 'Not_qualified')

0    Not_qualified
1                7
2                9
3                8
4    Not_qualified
5                6
6    Not_qualified
7                9
Name: Person_point, dtype: object

In [14]:
df4

Unnamed: 0,Day,Person,John,Alex,Oscar,Derek,Person_point
0,1,Alex,4,4,4,6,4
1,2,John,7,6,2,2,7
2,3,Alex,8,9,3,1,9
3,4,Derek,9,2,5,8,8
4,5,Oscar,2,6,1,7,1
5,6,John,6,6,4,8,6
6,7,Derek,6,5,5,4,4
7,8,Oscar,5,5,9,5,9


In [15]:
df4['Person_point'].where(df4['Person_point'] > 5, 'Not_qualified', inplace=True)

df4

Unnamed: 0,Day,Person,John,Alex,Oscar,Derek,Person_point
0,1,Alex,4,4,4,6,Not_qualified
1,2,John,7,6,2,2,7
2,3,Alex,8,9,3,1,9
3,4,Derek,9,2,5,8,8
4,5,Oscar,2,6,1,7,Not_qualified
5,6,John,6,6,4,8,6
6,7,Derek,6,5,5,4,Not_qualified
7,8,Oscar,5,5,9,5,9


# Infer_objects

In [16]:
df5 = pd.DataFrame({'A': ['a', 1, 2, 3],
                   'B':['b',2.1,1.5,2],
                   'C':['c',True,False,False],
                   'D':['a','b','c','d']})

df5

Unnamed: 0,A,B,C,D
0,a,b,c,a
1,1,2.1,True,b
2,2,1.5,False,c
3,3,2,False,d


In [17]:
df5 = df5[1:]
df5.dtypes

A    object
B    object
C    object
D    object
dtype: object

In [18]:
df5.infer_objects().dtypes

A      int64
B    float64
C       bool
D     object
dtype: object

In [19]:
df5.dtypes

A    object
B    object
C    object
D    object
dtype: object