[Reference](https://medium.com/@abdelilah.moulida/8-python-pandas-tips-to-make-data-analysis-faster-e9826b204363)

In [1]:
import pandas as pd 
pd.options.display.max_columns = 50  # None -> No Restrictions
pd.options.display.max_rows = 200    # None -> Be careful with this 
pd.options.display.max_colwidth = 100
pd.options.display.precision = 3

# Group by with multiple aggregations

In [17]:
import pandas as pd
import numpy as np 
df = pd.DataFrame(dict(A=['coke', 'sprite', 'coke', 'sprite',
                          'sprite', 'coke', 'coke'],
                       B=['alpha','gamma', 'alpha', 'beta',
                          'gamma', 'beta', 'beta'],
                       col_1=[1,2,3,4,5,6,7],
                       col_2=[1,6,2,4,7,9,3]))
tbl = df.groupby(['A','B']).agg({'col_1': ['max', np.mean],
                                 'col_2': ['sum','min','count']})# 'count' will always be the count for number of rows in each group.

In [18]:
tbl 

Unnamed: 0_level_0,Unnamed: 1_level_0,col_1,col_1,col_2,col_2,col_2
Unnamed: 0_level_1,Unnamed: 1_level_1,max,mean,sum,min,count
A,B,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
coke,alpha,3,2.0,3,1,2
coke,beta,7,6.5,12,3,2
sprite,beta,4,4.0,4,4,1
sprite,gamma,5,3.5,13,6,2


In [20]:
tbl = tbl.reset_index()
tbl.columns = ['A', 'B', 'col_1_max', 'col_1_mean','col_2_sum', 'col_2_min', 'count']

# Column slicing

In [21]:
df.iloc[:,2:5].head()             # select the 2nd to the 4th column
df.loc[:,'column_x':].head()   
# select all columns starting from 'column_x'

0
1
2
3
4


# Add row ID / random row ID to each group

In [22]:
import numpy as np
# df: target dataframe np.random.seed(0)   # set random seed
df['random_ID_all'] = np.random.permutation(df.shape[0])
df['ID_all'] = [i for i in range(1, df.shape[0]+1)]

In [24]:
df['ID'] = df.groupby(['A', 'B'])['ID_all'].rank(method='first', ascending=True).astype(int)
df['random_ID'] = df.groupby(['A', 'B'])['random_ID_all'].rank(method='first', ascending=True).astype(int)

# List all unique values in a group

In [26]:
df = pd.DataFrame(dict(A=['A','A','A','A','A','B','B','B','B'],
                       B=[1,1,1,2,2,1,1,1,2],
                       C=['CA','NY','CA','FL','FL',     
                          'WA','FL','NY','WA']))
tbl = df[['A', 'B', 'C']].drop_duplicates()\
                         .groupby(['A','B'])['C']\
                         .apply(list)\
                         .reset_index()# list to string (separated by commas) 
tbl['C'] = tbl.apply(lambda x: (','.join([str(s) for s in x['C']])), axis = 1)

# Add row total and column total to a numerical dataframe

In [28]:
df = pd.DataFrame(dict(A=[2,6,3],
                       B=[2,2,6], 
                       C=[3,2,3]))
df['col_total']     = df.apply(lambda x: x.sum(), axis=1)
df.loc['row_total'] = df.apply(lambda x: x.sum())

# Check memory usage
```
.memory_usage(deep=True)
```

# Cumulative sum
```
df['cumulative_sum'] = df['target_column'].cumsum()
```