In [27]:
# https://towardsdatascience.com/10-python-pandas-tricks-to-make-data-analysis-more-enjoyable-cb8f55af8c30
import pandas as pd
from IPython.core.interactiveshell import InteractiveShell 
InteractiveShell.ast_node_interactivity = "all"
import numpy as np 

In [16]:
pd.options.display.max_columns = 50  # None -> No Restrictions
pd.options.display.max_rows = 200    # None -> Be careful with this 
pd.options.display.max_colwidth = 100
pd.options.display.precision = 3

In [12]:
df = pd.DataFrame(dict(A=['coke', 'sprite', 'coke', 'sprite',
                          'sprite', 'coke', 'coke'],
                       B=['alpha','gamma', 'alpha', 'beta',
                          'gamma', 'beta', 'beta'],
                       col_1=[1,2,3,4,5,6,7],
                       col_2=[1,6,2,4,7,9,3]))
tbl = df.groupby(['A','B']).agg({'col_1': ['max', np.mean],
                                 'col_2': ['sum','min','count']})

# 'count' will always be the count for number of rows in each group.
tbl = tbl.reset_index()
tbl.columns = ['A', 'B', 'col_1_max', 'col_1_mean', 'col_2_sum', 'col_2_min', 'count']
tbl

In [33]:
df = pd.DataFrame(dict(A=['A','A','A','A','A','B','B','B','B'],
                       B=[1,1,1,2,2,1,1,1,2],
                       C=['CA','NY','CA','FL','FL',     
                          'WA','FL','NY','WA']))
tbl = df[['A', 'B', 'C']].drop_duplicates()\
                         .groupby(['A','B'])['C']\
                         .apply(list)\
                         .reset_index()
# list to string (separated by commas) 
tbl['C'] = tbl.apply(lambda x: (','.join([str(s) for s in x['C']])), axis = 1)

tbl
tbl.select_dtypes(include=['float64', 'int64', 'object'])

Unnamed: 0,A,B,C
0,A,1,"CA,NY"
1,A,2,FL
2,B,1,"WA,FL,NY"
3,B,2,WA


In [34]:
tbl.values

array([['A', 1, 'CA,NY'],
       ['A', 2, 'FL'],
       ['B', 1, 'WA,FL,NY'],
       ['B', 2, 'WA']], dtype=object)

In [25]:
df = pd.DataFrame(dict(A=[2,6,3],
                       B=[2,2,6], 
                       C=[3,2,3]))
df['col_total']     = df.apply(lambda x: x.sum(), axis=1)
df.loc['row_total'] = df.apply(lambda x: x.sum())
df

Unnamed: 0,A,B,C,col_total
0,2,2,3,7
1,6,2,2,10
2,3,6,3,12
row_total,11,10,8,29


In [32]:
df = pd.DataFrame({ 'id': [1,2,3], 'c1':[0,0,np.nan], 'c2': [np.nan,1,1]})
df = df[['id', 'c1', 'c2']]
df['num_nulls'] = df[['c1', 'c2']].isnull().sum(axis=1)
df.head()

Unnamed: 0,id,c1,c2,num_nulls
0,1,0.0,,1
1,2,0.0,1.0,0
2,3,,1.0,1
