In [4]:
import pandas as pd

# Example DataFrame
data = {
    "Name": ["Alice", "Bob", "Charlie"],
    "Details": [{"Age": 25, "City": "NYC"}, {"Age": 30, "City": "LA"}, 
                {"Age": 35, "City": "Chicago"}],
}

df = pd.DataFrame(data)

print(df)

      Name                         Details
0    Alice      {'Age': 25, 'City': 'NYC'}
1      Bob       {'Age': 30, 'City': 'LA'}
2  Charlie  {'Age': 35, 'City': 'Chicago'}


In [5]:
# Create a new column called Age
df["Age"] = df["Details"].apply(lambda x: x["Age"] )
print(df)

      Name                         Details  Age
0    Alice      {'Age': 25, 'City': 'NYC'}   25
1      Bob       {'Age': 30, 'City': 'LA'}   30
2  Charlie  {'Age': 35, 'City': 'Chicago'}   35


In [6]:
# Extract multiple keys into separate columns
df["City"] = df["Details"].apply(lambda x: x["City"])
print(df)

      Name                         Details  Age     City
0    Alice      {'Age': 25, 'City': 'NYC'}   25      NYC
1      Bob       {'Age': 30, 'City': 'LA'}   30       LA
2  Charlie  {'Age': 35, 'City': 'Chicago'}   35  Chicago


2. Exploding a Column with Lists

Use Case: A column contains lists, and you want to "explode" each element of 
the list into its own row.

In [7]:
# Example DataFrame
data = {
    "Name": ["Alice", "Bob", "Charlie"],
    "Hobbies": [["Reading", "Traveling"], ["Cooking"], ["Swimming", "Gaming", "Hiking"]],
}
df = pd.DataFrame(data)

# Explode the list into multiple rows
df_exploded = df.explode("Hobbies").reset_index(drop=True)
print(df_exploded)

      Name    Hobbies
0    Alice    Reading
1    Alice  Traveling
2      Bob    Cooking
3  Charlie   Swimming
4  Charlie     Gaming
5  Charlie     Hiking


In [8]:
import pandas as pd
from pandas import json_normalize

# Example JSON-like data
data = [
    {"Name": "Alice", "Details": {"Age": 25, "City": "NYC"}},
    {"Name": "Bob", "Details": {"Age": 30, "City": "LA"}},
    {"Name": "Charlie", "Details": {"Age": 35, "City": "Chicago"}},
]

# Convert to DataFrame and flatten JSON
df = pd.json_normalize(data)
print(df)

      Name  Details.Age Details.City
0    Alice           25          NYC
1      Bob           30           LA
2  Charlie           35      Chicago


In [9]:
# Example Multi-Level Columns DataFrame
data = {
    ("Sales", "2023"): [100, 200, 150],
    ("Sales", "2024"): [120, 250, 180],
    ("Profit", "2023"): [20, 50, 30],
    ("Profit", "2024"): [25, 60, 40],
}
df = pd.DataFrame(data, index=["Q1", "Q2", "Q3"])

# Access data for a specific level (e.g., 'Sales')
sales_data = df["Sales"]
print(sales_data)

# Access a specific sub-level (e.g., '2023' under 'Sales')
sales_2023 = df[("Sales", "2023")]
print(sales_2023)

# Flatten multi-level columns into single-level
df.columns = ['_'.join(col) for col in df.columns]
print(df)

    2023  2024
Q1   100   120
Q2   200   250
Q3   150   180
Q1    100
Q2    200
Q3    150
Name: (Sales, 2023), dtype: int64
    Sales_2023  Sales_2024  Profit_2023  Profit_2024
Q1         100         120           20           25
Q2         200         250           50           60
Q3         150         180           30           40


5. Iterating Over Nested Data with itertuples or apply

Use Case: Process nested elements row by row.

In [10]:
# Example DataFrame
data = {
    "Name": ["Alice", "Bob"],
    "Scores": [{"Math": 90, "Science": 85}, {"Math": 75, "Science": 80}],
}
df = pd.DataFrame(data)

# Process each row
def extract_math_score(row):
    return row["Scores"]["Math"]

df["Math Score"] = df.apply(extract_math_score, axis=1)
print(df)


    Name                       Scores  Math Score
0  Alice  {'Math': 90, 'Science': 85}          90
1    Bob  {'Math': 75, 'Science': 80}          75


#### 6. Working with Columns of Tuples
##### Use Case: A column contains tuples, and you want to extract individual elements.

In [12]:
# Example Multi-Index DataFrame
arrays = [["Group1", "Group1", "Group2", "Group2"], ["A", "B", "A", "B"]]
index = pd.MultiIndex.from_arrays(arrays, names=("Group", "Subgroup"))
df = pd.DataFrame({"Values": [10, 20, 30, 40]}, index=index)

# Extract data for a specific group
group1_data = df.loc["Group1"]
print(group1_data)

# Access data for specific subgroup
group1_b = df.loc[("Group1", "B")]
print(group1_b)

          Values
Subgroup        
A             10
B             20
Values    20
Name: (Group1, B), dtype: int64



#### 7. Handling Multi-Index DataFrames Use 
Case: Extract data from rows or columns indexed with multiple levels.