In [1]:
# uese: 
'''
✅ Hierarchical Organization

Allows you to group data by multiple levels (e.g., Year & Quarter, Country & City).

📊 Easier Grouping & Aggregation
Makes operations like groupby() and agg() more powerful and flexible.

🔁 Pivot Table Style Analysis
Ideal for reshaping data (like unstacking rows into columns) for reports and dashboards.

📂 Managing Panel/Time Series Data
Useful in financial, sales, or sensor data where you have repeated measurements over multiple dimensions.

✨ Cleaner and More Readable DataFrames
Makes complex data easier to read and navigate, especially for large datasets.
'''

'\n✅ Hierarchical Organization\n\nAllows you to group data by multiple levels (e.g., Year & Quarter, Country & City).\n\n📊 Easier Grouping & Aggregation\n\nMakes operations like groupby() and agg() more powerful and flexible.\n\n🔁 Pivot Table Style Analysis\n\nIdeal for reshaping data (like unstacking rows into columns) for reports and dashboards.\n\n📂 Managing Panel/Time Series Data\n\nUseful in financial, sales, or sensor data where you have repeated measurements over multiple dimensions.\n\n✨ Cleaner and More Readable DataFrames\n\nMakes complex data easier to read and navigate, especially for large datasets.\n'

In [5]:
'''
import pandas as pd

# Simple data (just 3 rows)
data = {
    'Marks': [90, 80, 85]
}

# MultiIndex created from tuples
index = pd.MultiIndex.from_tuples([
    ('Alice', 'Math'),
    ('Bob', 'Math'),
    ('Carol', 'Math')
], names=['Name', 'Subject'])

# Create the DataFrame
df = pd.DataFrame(data, index=index)

# Show the DataFrame
print(df)
'''
import pandas as pd

data = {'Marks' : [90,80,70]}
index = pd.MultiIndex.from_tuples([('Alice', 'math'),
                                  ('Bob', 'math'),
                                  ('carol','math')], names = ['Name', 'Subject'])
df = pd.DataFrame(data, index = index)
print(df)

               Marks
Name  Subject       
Alice math        90
Bob   math        80
carol math        70


In [11]:
# 🧑‍🎓 Multi-Subject Example:
import pandas as pd

# Data: 6 rows (3 students x 2 subjects)
'''
data = {
    'Marks': [90, 85, 88, 80, 78, 92]
}

# MultiIndex: each student has two subjects
index = pd.MultiIndex.from_tuples([
    ('Alice', 'Math'),
    ('Alice', 'Science'),
    ('Bob', 'Math'),
    ('Bob', 'Science'),
    ('Carol', 'Math'),
    ('Carol', 'Science')
], names=['Name', 'Subject'])

# Create DataFrame
df = pd.DataFrame(data, index=index)

# Display
print(df)
'''
data = {'marks' :[80,90,70,98,97,80]}

index = pd.MultiIndex.from_tuples([('Alice', 'Math'),
                                  ('Alice','English'),
                                  ('siva', 'science'),
                                  ('siva', 'social'),
                                   ('Bob', 'science'),
                                  ('Bob', 'history')], names = ['name', 'subject'])
df = pd.DataFrame(data, index = index)
print(df)

               marks
name  subject       
Alice Math        80
      English     90
siva  science     70
      social      98
Bob   science     97
      history     80


In [13]:
# creating multiple index
'''
import pandas as pd
# Sample data
data = {
    'Sales': [100, 150, 200, 250],
    'Profit': [20, 30, 50, 70]
}
# Creating MultiIndex
index = pd.MultiIndex.from_tuples([
    ('2024', 'Q1'),
    ('2024', 'Q2'),
    ('2025', 'Q1'),
    ('2025', 'Q2')
], names=['Year', 'Quarter'])

df = pd.DataFrame(data, index=index)
print(df)
'''
import pandas as pd
data = {'Sales': [100,150,200,250],
       'Profit': [20,30,50,70]}

index = pd.MultiIndex.from_tuples([('2024', 'Q1'),
                                 ('2024','Q2'),
                                 ('2025','Q1'),
                                 ('2025', 'Q2')],
                                 names = ['Year','Quarter'])
df = pd.DataFrame(data, index = index)
print(df)

              Sales  Profit
Year Quarter               
2024 Q1         100      20
     Q2         150      30
2025 Q1         200      50
     Q2         250      70


In [18]:
#  2. Accessing Data in MultiIndex

# # Get data for year 2024
print(df.loc['2024'])
print()
print()
print(df.loc['2024','Q1'])

         Sales  Profit
Quarter               
Q1         100      20
Q2         150      30


Sales     100
Profit     20
Name: (2024, Q1), dtype: int64


In [23]:
#  3. Using xs() to Access Data by Level

# .xs() stands for "cross section".
# 👉 "Get all rows where Quarter is 'Q1'", no matter what the year is.

print('1) Q1 sales no matter what the years:')
print()
print(df.xs('Q1', level = 'Quarter'))
print()
print('2) Q2 sales no matter what the years:')
print(df.xs('Q2', level = 'Quarter'))

1) Q1 sales no matter what the years:

      Sales  Profit
Year               
2024    100      20
2025    200      50

2) Q2 sales no matter what the years:
      Sales  Profit
Year               
2024    150      30
2025    250      70


In [29]:
# 4. Aggregation with groupby() on Levels

print('Mean :')
print(df.groupby('Year').mean())
print()
print('Standard deviation: ')
print(df.groupby('Year').std())

Mean :
      Sales  Profit
Year               
2024  125.0    25.0
2025  225.0    60.0

Standard deviation
          Sales     Profit
Year                      
2024  35.355339   7.071068
2025  35.355339  14.142136


In [31]:
#  5. Swapping Index Levels

df_swapped = df.swaplevel()
print(df_swapped.sort_index())

              Sales  Profit
Quarter Year               
Q1      2024    100      20
        2025    200      50
Q2      2024    150      30
        2025    250      70


In [41]:
# 6. Reset and Set Index

reset_df = df.reset_index()
print(reset_df)
print()
again = reset_df.set_index(['Year', 'Quarter'])
print(again)

   Year Quarter  Sales  Profit
0  2024      Q1    100      20
1  2024      Q2    150      30
2  2025      Q1    200      50
3  2025      Q2    250      70

              Sales  Profit
Year Quarter               
2024 Q1         100      20
     Q2         150      30
2025 Q1         200      50
     Q2         250      70


In [46]:
# 7. Filtering with Multiple Conditions

# print(df[df.index.get_level_values('Quarter') == 'Q2'])

print(df[df.index.get_level_values('Quarter') == 'Q2'])
print()
print(df[df.index.get_level_values('Quarter') == 'Q1'])
print()
print(df[df['Sales']> 200])


              Sales  Profit
Year Quarter               
2024 Q2         150      30
2025 Q2         250      70

              Sales  Profit
Year Quarter               
2024 Q1         100      20
2025 Q1         200      50

              Sales  Profit
Year Quarter               
2025 Q2         250      70


In [47]:
# 🛒 Mini Project: Sales Analysis by Region and Quarter
'''
🔧 Scenario:
You work as a data analyst for a retail company. You have quarterly sales data for multiple products across different regions. 
Your goal is to analyze sales and profit using MultiIndex techniques.
'''
# ✅ Step 1: Create the DataFrame with MultiIndex

import pandas as pd
data = {'Sales':  [1200, 1500, 1800, 1100, 1700, 1600, 1900, 1400],
       'Profit': [300, 400, 450, 200, 350, 320, 500, 280]}

index = pd.MultiIndex.from_tuples([('North','Q1'),
                                  ('North','Q2'),
                                  ('North','Q3'),
                                  ('North','Q4'),
                                   ('South','Q1'),
                                   ('South','Q2'),
                                   ('South','Q3'),
                                   ('South','Q14')
                                  ], names = ['Region', 'Quarter'])
df = pd.DataFrame(data, index = index)
print(df)

                Sales  Profit
Region Quarter               
North  Q1        1200     300
       Q2        1500     400
       Q3        1800     450
       Q4        1100     200
South  Q1        1700     350
       Q2        1600     320
       Q3        1900     500
       Q14       1400     280


In [51]:
# 🎯 Tasks to Practice
# 1️⃣ View total sales and profit per region (use groupby() and sum())

print(df.groupby(level = 'Region').sum())

        Sales  Profit
Region               
North    5600    1350
South    6600    1450


In [54]:
# 2️⃣ Find the quarter with the highest profit for each region
print(df.groupby(level = 'Region')['Profit'].idxmax())

Region
North    (North, Q3)
South    (South, Q3)
Name: Profit, dtype: object


In [57]:
# 3️⃣ Get all data for Q2 across all regions (use .xs())
print(df.xs('Q2', level = "Quarter"))

        Sales  Profit
Region               
North    1500     400
South    1600     320


In [66]:
# 4️⃣ Add a new column: Profit Margin (%)
df['Profit_margin(%)'] = df['Profit']/ df['Sales']*100
print(df)



                Sales  Profit  Profit_margin(%)
Region Quarter                                 
North  Q1        1200     300         25.000000
       Q2        1500     400         26.666667
       Q3        1800     450         25.000000
       Q4        1100     200         18.181818
South  Q1        1700     350         20.588235
       Q2        1600     320         20.000000
       Q3        1900     500         26.315789
       Q14       1400     280         20.000000


In [67]:
# 5️⃣ Filter out entries where sales are above 1600
print(df[df['Sales']> 1600])

                Sales  Profit  Profit_margin(%)
Region Quarter                                 
North  Q3        1800     450         25.000000
South  Q1        1700     350         20.588235
       Q3        1900     500         26.315789


In [71]:
# 6️⃣ Swap levels and sort by Quarter first, then Region
swapped = df.swaplevel().sort_index()
print(swapped)

                Sales  Profit  Profit_margin(%)
Quarter Region                                 
Q1      North    1200     300         25.000000
        South    1700     350         20.588235
Q14     South    1400     280         20.000000
Q2      North    1500     400         26.666667
        South    1600     320         20.000000
Q3      North    1800     450         25.000000
        South    1900     500         26.315789
Q4      North    1100     200         18.181818


In [72]:
# 7️⃣ Reset the index and view as a flat table
print(df.reset_index())

  Region Quarter  Sales  Profit  Profit_margin(%)
0  North      Q1   1200     300         25.000000
1  North      Q2   1500     400         26.666667
2  North      Q3   1800     450         25.000000
3  North      Q4   1100     200         18.181818
4  South      Q1   1700     350         20.588235
5  South      Q2   1600     320         20.000000
6  South      Q3   1900     500         26.315789
7  South     Q14   1400     280         20.000000


In [1]:
import pandas as pd

data = {'Region' : ['East', 'East', 'West', 'West'],
       'Month': ['Jan', 'Feb', 'Jan', 'Feb'],
       'Sales': [200, 210, 180, 190]}
df = pd.DataFrame(data)
print(df)

  Region Month  Sales
0   East   Jan    200
1   East   Feb    210
2   West   Jan    180
3   West   Feb    190


In [3]:
df_multi = df.set_index(['Region','Month'])
print(df_multi)

              Sales
Region Month       
East   Jan      200
       Feb      210
West   Jan      180
       Feb      190


In [6]:
reset = df_multi.reset_index()
print(reset)

  Region Month  Sales
0   East   Jan    200
1   East   Feb    210
2   West   Jan    180
3   West   Feb    190


In [8]:
df_reset = df_multi.reset_index(level = 'Region')
print(df_reset)


      Region  Sales
Month              
Jan     East    200
Feb     East    210
Jan     West    180
Feb     West    190


In [9]:
df_multi.loc[('East','Jan')]

Sales    200
Name: (East, Jan), dtype: int64

In [10]:
# Access All Months for a Region:
print(df_multi.loc['East'])

       Sales
Month       
Jan      200
Feb      210
