### Prepared by Abhishek Kumar
### https://www.linkedin.com/in/abhishek-kumar-442337b2/


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# To get multiple outputs in the same cell

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

%matplotlib inline

In [3]:
# Setup : DataFrame creation

salary = [['1','Abhishek Kumar','AIML', 'Machine Learning Engineer','M', 'Y', '04051990', 1121000],
          ['2','Arjun Kumar','DM', 'Tech Lead','M', 'Y', '09031992', 109000],
          ['3','Vivek Raj','DM', 'Devops Engineer','M', 'N', np.NaN , 827000],
          ['4','Mika Singh','DM', 'Data Analyst','F', 'Y', '15101991',  np.NaN],
          ['5','Anusha Yenduri','AIML', 'Data Scientist','F', 'Y', '01011989',  921000],
          ['6','Ritesh Srivastava','AIML', 'Data Engineer','M', 'Y', np.NaN, 785000]]

columns_name=['Emp_Id','Emp_Name','Department','Role','Gender', 'WFH Status', 'DOB', 'Salary']

emp_df = pd.DataFrame(salary,columns=columns_name)
emp_df

Unnamed: 0,Emp_Id,Emp_Name,Department,Role,Gender,WFH Status,DOB,Salary
0,1,Abhishek Kumar,AIML,Machine Learning Engineer,M,Y,4051990.0,1121000.0
1,2,Arjun Kumar,DM,Tech Lead,M,Y,9031992.0,109000.0
2,3,Vivek Raj,DM,Devops Engineer,M,N,,827000.0
3,4,Mika Singh,DM,Data Analyst,F,Y,15101991.0,
4,5,Anusha Yenduri,AIML,Data Scientist,F,Y,1011989.0,921000.0
5,6,Ritesh Srivastava,AIML,Data Engineer,M,Y,,785000.0


# 1. Reshaping with Melt

There are many different ways to reshape a pandas dataframe from wide to long form.
But the melt() method is the most flexible

In [4]:
# Sample data set-up

emp_df_1 = emp_df.copy()

emp_df_1['Holi_Bonus'] = emp_df_1['Salary']*0.05
emp_df_1['Diwali_Bonus'] = emp_df_1['Salary']*0.075
emp_df_1['Yearly_Bonus'] = emp_df_1['Salary']*0.10
emp_df_1

Unnamed: 0,Emp_Id,Emp_Name,Department,Role,Gender,WFH Status,DOB,Salary,Holi_Bonus,Diwali_Bonus,Yearly_Bonus
0,1,Abhishek Kumar,AIML,Machine Learning Engineer,M,Y,4051990.0,1121000.0,56050.0,84075.0,112100.0
1,2,Arjun Kumar,DM,Tech Lead,M,Y,9031992.0,109000.0,5450.0,8175.0,10900.0
2,3,Vivek Raj,DM,Devops Engineer,M,N,,827000.0,41350.0,62025.0,82700.0
3,4,Mika Singh,DM,Data Analyst,F,Y,15101991.0,,,,
4,5,Anusha Yenduri,AIML,Data Scientist,F,Y,1011989.0,921000.0,46050.0,69075.0,92100.0
5,6,Ritesh Srivastava,AIML,Data Engineer,M,Y,,785000.0,39250.0,58875.0,78500.0


In [5]:
emp_df_1_long = emp_df_1.melt(id_vars = ['Emp_Id','Emp_Name'] , 
                              value_vars = [ 'Holi_Bonus','Diwali_Bonus','Yearly_Bonus' ],
                              var_name = 'Event',
                              value_name = 'Bonus' )
emp_df_1_long

Unnamed: 0,Emp_Id,Emp_Name,Event,Bonus
0,1,Abhishek Kumar,Holi_Bonus,56050.0
1,2,Arjun Kumar,Holi_Bonus,5450.0
2,3,Vivek Raj,Holi_Bonus,41350.0
3,4,Mika Singh,Holi_Bonus,
4,5,Anusha Yenduri,Holi_Bonus,46050.0
5,6,Ritesh Srivastava,Holi_Bonus,39250.0
6,1,Abhishek Kumar,Diwali_Bonus,84075.0
7,2,Arjun Kumar,Diwali_Bonus,8175.0
8,3,Vivek Raj,Diwali_Bonus,62025.0
9,4,Mika Singh,Diwali_Bonus,


# 2. Reshaping with Pivot_table

The pivot_table() method is the most flexible to reshape pandas dataframes from long to wide in Python

In [6]:
emp_df_1_wide_1 = emp_df_1_long.pivot_table(index =  ['Emp_Id','Emp_Name'] ,
                                          columns = 'Event',
                                          values = 'Bonus' ).reset_index()
emp_df_1_wide_1

Event,Emp_Id,Emp_Name,Diwali_Bonus,Holi_Bonus,Yearly_Bonus
0,1,Abhishek Kumar,84075.0,56050.0,112100.0
1,2,Arjun Kumar,8175.0,5450.0,10900.0
2,3,Vivek Raj,62025.0,41350.0,82700.0
3,5,Anusha Yenduri,69075.0,46050.0,92100.0
4,6,Ritesh Srivastava,58875.0,39250.0,78500.0


In [7]:
emp_df_1_wide_2 = emp_df_1_long.pivot_table(index =  ['Emp_Id','Emp_Name'] ,
                                           columns = 'Event',
                                           values = 'Bonus',
                                           margins = True ).reset_index()  # default aggfunc = 'mean'
emp_df_1_wide_2

Event,Emp_Id,Emp_Name,Diwali_Bonus,Holi_Bonus,Yearly_Bonus,All
0,1,Abhishek Kumar,84075.0,56050.0,112100.0,84075.0
1,2,Arjun Kumar,8175.0,5450.0,10900.0,8175.0
2,3,Vivek Raj,62025.0,41350.0,82700.0,62025.0
3,5,Anusha Yenduri,69075.0,46050.0,92100.0,69075.0
4,6,Ritesh Srivastava,58875.0,39250.0,78500.0,58875.0
5,All,,56445.0,37630.0,75260.0,56445.0


In [8]:
emp_df_1_wide_3 = emp_df_1_long.pivot_table(index =  ['Emp_Id','Emp_Name'] ,
                                           columns = 'Event',
                                           values = 'Bonus',
                                           margins = True,
                                           aggfunc = 'sum').reset_index()
emp_df_1_wide_3

Event,Emp_Id,Emp_Name,Diwali_Bonus,Holi_Bonus,Yearly_Bonus,All
0,1,Abhishek Kumar,84075.0,56050.0,112100.0,252225.0
1,2,Arjun Kumar,8175.0,5450.0,10900.0,24525.0
2,3,Vivek Raj,62025.0,41350.0,82700.0,186075.0
3,4,Mika Singh,0.0,0.0,0.0,
4,5,Anusha Yenduri,69075.0,46050.0,92100.0,207225.0
5,6,Ritesh Srivastava,58875.0,39250.0,78500.0,176625.0
6,All,,282225.0,188150.0,376300.0,846675.0


In [9]:
# Only row-wise aggregation

emp_df_1_wide_4 = emp_df_1_long.pivot_table(index =  ['Emp_Id','Emp_Name']) # default aggfunc = 'mean'
emp_df_1_wide_4

Unnamed: 0_level_0,Unnamed: 1_level_0,Bonus
Emp_Id,Emp_Name,Unnamed: 2_level_1
1,Abhishek Kumar,84075.0
2,Arjun Kumar,8175.0
3,Vivek Raj,62025.0
5,Anusha Yenduri,69075.0
6,Ritesh Srivastava,58875.0


In [10]:
emp_df_1_wide_4 = emp_df_1_long.pivot_table(index =  ['Emp_Id','Emp_Name'] ,
                                           columns = 'Event',
                                           values = 'Bonus',
                                           fill_value = 1000)
emp_df_1_wide_4

Unnamed: 0_level_0,Event,Diwali_Bonus,Holi_Bonus,Yearly_Bonus
Emp_Id,Emp_Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,Abhishek Kumar,84075,56050,112100
2,Arjun Kumar,8175,5450,10900
3,Vivek Raj,62025,41350,82700
5,Anusha Yenduri,69075,46050,92100
6,Ritesh Srivastava,58875,39250,78500


### There are other techniques that enables Re-Shaping of dataframes.

    i. pivot()
    ii. stack() & unstack()
    iii. wide_to_long()
    iv. crosstab()
    v. cut()

### References:

#### 1. [Pandas Documentation](#https://pandas.pydata.org/pandas-docs/stable/user_guide/reshaping.html)
#### 2. [TDS](#https://towardsdatascience.com/reshape-pandas-dataframe-with-melt-in-python-tutorial-and-visualization-29ec1450bb02)