In [1]:
import pandas as pd
import numpy as np

# The cumulative sum of a column in DataFrame

In [3]:
df = pd.DataFrame([["A", 1], ["B", 2], ["C", 3], ["D", 4]], 
                  columns=["col_A", "col_B"])

df["col_C"] = df.col_B.cumsum()
df['col_D'] = df.col_A.cumsum()
df

Unnamed: 0,col_A,col_B,col_C,col_D
0,A,1,1,A
1,B,2,3,AB
2,C,3,6,ABC
3,D,4,10,ABCD


# Assign Unique IDs to every Group

In [16]:
df = pd.DataFrame([["A", 1], ["B", 2], ["A", 3], ["D", 4]], 
                  columns=["col_A", "col_B"])

df["group_num"] = df.groupby("col_A").grouper.group_info[0] 
df

Unnamed: 0,col_A,col_B,group_num
0,A,1,0
1,B,2,1
2,A,3,0
3,D,4,2


#  Check if a column has NaN values

In [17]:
df = pd.DataFrame([["A", np.NaN], ["A", 2], ["C", np.NaN], ["D", 4]], 
                  columns=["col_A", "col_B"])


col_A_check = df.col_A.hasnans
col_B_check = df.col_B.hasnans

col_A_check , col_B_check

(False, True)

# Append a list as a row to a DataFrame

In [20]:
df = pd.DataFrame([["A", 1], ["B", 2], ["C", 3], ["D", 4]], 
                  columns=["col_A", "col_B"])

new_row = ["E", 5]

df.loc[df.shape[0]] = new_row
df

Unnamed: 0,col_A,col_B
0,A,1
1,B,2
2,C,3
3,D,4
4,E,5


# Get the first row of every unique value in a column

In [25]:
df = pd.DataFrame([["A", 1], ["B", 2], ["A", 3], ["D", 4]], 
                  columns=["col_A", "col_B"])

df = df.groupby("col_A").first()
df

Unnamed: 0_level_0,col_B
col_A,Unnamed: 1_level_1
A,1
B,2
D,4


In [26]:
df = pd.DataFrame([["A", 1], ["B", 2], ["A", 3], ["D", 4]], 
                  columns=["col_A", "col_B"])

df = df.groupby("col_A").last()
df

Unnamed: 0_level_0,col_B
col_A,Unnamed: 1_level_1
A,3
B,2
D,4


# Identify the source of each row in Pandas Merge

In [33]:
df1 = pd.DataFrame([["A", 1], ["B", 2]], 
                  columns=["col_A", "col_B"])

df2 = pd.DataFrame([["A", 3], ["C", 4]], 
                  columns=["col_A", "col_C"])

df = pd.merge(df1,df2,how='left',indicator = True)
df

Unnamed: 0,col_A,col_B,col_C,_merge
0,A,1,3.0,both
1,B,2,,left_only


# Filter n-largest and n-smallest values from a DataFrame

In [37]:
df = pd.DataFrame([["A", 200], ["B", 400], ["C", 100], ["D", 300]], 
                  columns=["col_A", "col_B"])

new_df = df.nlargest(n=2, columns="col_B")
new_df2 = df.nsmallest(n=2 , columns="col_B")
new_df ,new_df2

(  col_A  col_B
 1     B    400
 3     D    300,
   col_A  col_B
 2     C    100
 0     A    200)

#  Map categorical data to unique integral values

In [38]:
df = pd.DataFrame([["A", 1], ["B", 2],
                   ["A", 3], ["D", 4]], 
                  columns=["col_A", "col_B"])

df["new_col"] = pd.factorize(df.col_A)[0]
df

Unnamed: 0,col_A,col_B,new_col
0,A,1,0
1,B,2,1
2,A,3,0
3,D,4,2


# Add prefix to every column name

In [39]:
df = pd.DataFrame([["A", 1], ["B", 2], ["C", 3], ["D", 4]], 
                  columns=["col_A", "col_B"])


df.add_prefix("pre_")

Unnamed: 0,pre_col_A,pre_col_B
0,A,1
1,B,2
2,C,3
3,D,4


# Convert categorical columns to one hot values
  * It is important

In [40]:
df = pd.DataFrame([["A"], ["B"], ["C"], ["A"]], 
                  columns=["col_A"])

new_df = pd.get_dummies(df.col_A)
new_df

Unnamed: 0,A,B,C
0,1,0,0
1,0,1,0
2,0,0,1
3,1,0,0
