# Pivot allows reshaping the data frame

In [1]:
import pandas as pd

In [24]:
weather_df = pd.read_csv("C:/PythonTutorial/MyPandas_Blog/data sets/weather_for_pivot.csv")

In [25]:
weather_df

Unnamed: 0,date,city,temperature,humidity
0,5/1/2017,new york,65,56
1,5/2/2017,new york,66,58
2,5/3/2017,new york,68,60
3,5/1/2017,mumbai,75,80
4,5/2/2017,mumbai,78,83
5,5/3/2017,mumbai,82,85
6,5/1/2017,beijing,80,26
7,5/2/2017,beijing,77,30
8,5/3/2017,beijing,79,35


## Transforming the pivot into a pivot

In [26]:
weather_df.pivot(index = "date", columns="city")

Unnamed: 0_level_0,temperature,temperature,temperature,humidity,humidity,humidity
city,beijing,mumbai,new york,beijing,mumbai,new york
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
5/1/2017,80,75,65,26,80,56
5/2/2017,77,78,66,30,83,58
5/3/2017,79,82,68,35,85,60


In [27]:
## To have only the humidity column in the output data frame
weather_df.pivot(index = "date", columns="city", values="humidity")

city,beijing,mumbai,new york
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
5/1/2017,26,80,56
5/2/2017,30,83,58
5/3/2017,35,85,60


## Pivot table - used to summarise and aggregate data inside data frame

In [29]:
weather2_df = pd.read_csv("C:/PythonTutorial/MyPandas_Blog/data sets/weather2_for_pivot.csv")
weather2_df

Unnamed: 0,date,city,temperature,humidity
0,5/1/2017,new york,65,56
1,5/1/2017,new york,61,54
2,5/2/2017,new york,70,60
3,5/2/2017,new york,72,62
4,5/1/2017,mumbai,75,80
5,5/1/2017,mumbai,78,83
6,5/2/2017,mumbai,82,85
7,5/2/2017,mumbai,80,26


* For the same day we have multiple rows *

In [30]:
weather2_df.pivot_table(index="city", columns="date")

Unnamed: 0_level_0,humidity,humidity,temperature,temperature
date,5/1/2017,5/2/2017,5/1/2017,5/2/2017
city,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
mumbai,81.5,55.5,76.5,81.0
new york,55.0,61.0,63.0,71.0


In [31]:
weather2_df.pivot_table(index="city", columns="date", values="temperature")


date,5/1/2017,5/2/2017
city,Unnamed: 1_level_1,Unnamed: 2_level_1
mumbai,76.5,81.0
new york,63.0,71.0


In [34]:
# We can provide the aggregate function by aggfunc argument
result_df = weather2_df.pivot_table(index="city", columns="date", aggfunc="mean")
result_df

Unnamed: 0_level_0,humidity,humidity,temperature,temperature
date,5/1/2017,5/2/2017,5/1/2017,5/2/2017
city,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
mumbai,81.5,55.5,76.5,81.0
new york,55.0,61.0,63.0,71.0


In [36]:
# We can pass the margin = True argument for aggregation

result_df = weather2_df.pivot_table(index="city", columns="date", aggfunc="mean", margins=True)
result_df

Unnamed: 0_level_0,humidity,humidity,humidity,temperature,temperature,temperature
date,5/1/2017,5/2/2017,All,5/1/2017,5/2/2017,All
city,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
mumbai,81.5,55.5,68.5,76.5,81.0,78.75
new york,55.0,61.0,58.0,63.0,71.0,67.0
All,68.25,58.25,63.25,69.75,76.0,72.875


# Grouper in Pivot table

In [41]:
weather3_df = pd.read_csv("C:/PythonTutorial/MyPandas_Blog/data sets/weather3_for_pivot.csv")
weather3_df

Unnamed: 0,date,city,temperature,humidity
0,5/1/2017,new york,65,56
1,5/2/2017,new york,61,54
2,5/3/2017,new york,70,60
3,12/1/2017,new york,30,50
4,12/2/2017,new york,28,52
5,12/3/2017,new york,25,51


In [45]:
weather3_df['date'] = pd.to_datetime(weather3_df['date'])
weather3_df.pivot_table(index = pd.Grouper(freq='M',key='date'), columns='city')


Unnamed: 0_level_0,humidity,temperature
city,new york,new york
date,Unnamed: 1_level_2,Unnamed: 2_level_2
2017-05-31,56.666667,65.333333
2017-12-31,51.0,27.666667
