## 🔄 What is a Pivot Table?
A pivot table lets you:

    Summarize data

    Group rows and columns

    Apply aggregations (like mean, sum, count, etc.)

    It's like an Excel pivot table but in code.

In [20]:
from IPython.display import HTML
HTML('<img src="Groupby_VS_Pivot.JPG" style="width:600px;height:500px">')


## 🧩 Basic Setup Data

In [1]:
import pandas as pd

data = {
    'Department': ['HR', 'HR', 'IT', 'IT', 'Finance', 'Finance', 'IT'],
    'Employee': ['Alice', 'Bob', 'Charlie', 'David', 'Eva', 'Frank', 'Eva'],
    'Gender': ['F', 'M', 'M', 'M', 'F', 'M', 'F'],
    'Salary': [50000, 60000, 70000, 80000, 45000, 40000, 75000],
    'Bonus': [5000, 6000, 7000, 8000, 4000, 3000, 7500]
}

df = pd.DataFrame(data)


In [2]:
df

Unnamed: 0,Department,Employee,Gender,Salary,Bonus
0,HR,Alice,F,50000,5000
1,HR,Bob,M,60000,6000
2,IT,Charlie,M,70000,7000
3,IT,David,M,80000,8000
4,Finance,Eva,F,45000,4000
5,Finance,Frank,M,40000,3000
6,IT,Eva,F,75000,7500


## 🔁 pivot_table() — Complete Guide
## ✅ Syntax

pd.pivot_table(

    data,
    
    index=None,
    
    columns=None,
    
    values=None,
    
    aggfunc='mean',
    
    fill_value=None,
    
    margins=False
    
)


In [6]:
from IPython.display import HTML
HTML('<img src="Pivot.png" style="width:600px;height:300px">')

## ✅ Example 1: Avg Salary by Department

In [7]:
pd.pivot_table(df, index='Department', values='Salary')

Unnamed: 0_level_0,Salary
Department,Unnamed: 1_level_1
Finance,42500
HR,55000
IT,75000


## ✅ Example 2: Sum of Salary by Department and Gender

In [8]:
pd.pivot_table(df, index='Department', columns='Gender', values='Salary', aggfunc='sum')

Gender,F,M
Department,Unnamed: 1_level_1,Unnamed: 2_level_1
Finance,45000,40000
HR,50000,60000
IT,75000,150000


## ✅ Example 3: Count of Employees by Department and Gender

In [9]:
pd.pivot_table(df, index='Department', columns='Gender', values='Employee', aggfunc='count', fill_value=0)

Gender,F,M
Department,Unnamed: 1_level_1,Unnamed: 2_level_1
Finance,1,1
HR,1,1
IT,1,2


## ✅ Example 4: Multiple aggfuncs

In [10]:
pd.pivot_table(df, index='Department', values='Salary', aggfunc=['mean', 'max', 'count'])

Unnamed: 0_level_0,mean,max,count
Unnamed: 0_level_1,Salary,Salary,Salary
Department,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Finance,42500,45000,2
HR,55000,60000,2
IT,75000,80000,3


## ✅ Example 5: Add Totals (margins)

In [11]:
pd.pivot_table(df, index='Department', values='Salary', aggfunc='sum', margins=True)

Unnamed: 0_level_0,Salary
Department,Unnamed: 1_level_1
Finance,85000
HR,110000
IT,225000
All,420000


## 📊 pd.crosstab() — When You Don't Need Aggregation
crosstab() is for counting frequency (like a confusion matrix).

## ✅ Syntax:

pd.crosstab(index, columns, values=None, aggfunc=None, normalize=False)


## ✅ Example 1: Count Employees by Department and Gender

In [13]:
pd.crosstab(df['Department'], df['Gender'])

Gender,F,M
Department,Unnamed: 1_level_1,Unnamed: 2_level_1
Finance,1,1
HR,1,1
IT,1,2


## ✅ Example 2: Normalize (Percent by Row)

In [14]:
pd.crosstab(df['Department'], df['Gender'], normalize='index')

Gender,F,M
Department,Unnamed: 1_level_1,Unnamed: 2_level_1
Finance,0.5,0.5
HR,0.5,0.5
IT,0.333333,0.666667


## ✅ Example 3: Crosstab with aggregation (sum of salary)

In [15]:
pd.crosstab(df['Department'], df['Gender'], values=df['Salary'], aggfunc='sum')

Gender,F,M
Department,Unnamed: 1_level_1,Unnamed: 2_level_1
Finance,45000,40000
HR,50000,60000
IT,75000,150000


In [16]:
HTML('<img src="Pivot_1.png" style="width:600px;height:300px">')