In [4]:

# =============================================
# 📘 IMPORTING LIBRARIES
# =============================================

import pandas as pd
import numpy as np

# =============================================
# 🧩 TASK 1: INTRODUCING PANDAS OBJECTS
# =============================================

# Load dataset


# Use full file path
path = r"C:\Users\preethi\Downloads\archive (5)\index_1.csv"
df = pd.read_csv(path)

print("✅ Dataset Loaded Successfully!")
print(df.head())


# Display first few rows
print("✅ Dataset Loaded Successfully!")
print("\nFirst 5 Rows:\n", df.head())

# Insert your name as the first entry (customized example)
# You can adjust column names based on your dataset structure
# Let's automatically detect the columns first
print("\nDataset Columns:", df.columns.tolist())

# Try to create a row with your name
# (Modify according to your dataset columns)
new_row = {df.columns[0]: 'Preethi'}
for col in df.columns[1:]:
    new_row[col] = np.nan  # fill others as missing for now

# Add your row at the top
df = pd.concat([pd.DataFrame([new_row]), df], ignore_index=True)

print("\n✅ Added first entry with your name!\n")
print(df.head(10))  # Display first 10 rows

# Display basic info
print("\n🔍 Dataset Information:")
df.info()

print("\n📊 Statistical Summary:")
print(df.describe(include='all'))

# ---------------------------------------------
# Create a small Series and DataFrame manually
# ---------------------------------------------
print("\n🧠 Creating Custom Series and DataFrame")

# Series Example
marks = pd.Series([90, 85, 92], index=['Math', 'Science', 'English'])
print("\nSeries Example:\n", marks)

# DataFrame Example
student_data = {
    'Name': ['A', 'B', 'C'],
    'Math': [85, 90, 95],
    'Science': [88, 84, 91]
}
df2 = pd.DataFrame(student_data)
print("\nDataFrame Example:\n", df2)

# =============================================
# 🧩 TASK 2: HANDLING MISSING DATA
# =============================================

print("\n🚨 Checking Missing Values:")
print(df.isnull().sum())

# Fill missing numeric values with mean
df_filled = df.copy()
df_filled = df_filled.fillna(df.mean(numeric_only=True))

# Fill categorical missing values with mode
for col in df_filled.select_dtypes(include=['object']).columns:
    df_filled[col] = df_filled[col].fillna(df_filled[col].mode()[0])


print("\n✅ Missing Values Handled Successfully!")
print(df_filled.isnull().sum())

# Optionally remove missing rows if needed
# df.dropna(inplace=True)

# =============================================
# 🧩 TASK 3: HIERARCHICAL INDEXING
# =============================================

# To demonstrate, pick first two columns for multi-level indexing
if len(df_filled.columns) >= 2:
    df_hier = df_filled.set_index([df_filled.columns[0], df_filled.columns[1]])
    print("\n📑 Hierarchical Index Created!\n")
    print(df_hier.head())

    # Access data using loc[]
    print("\n🎯 Accessing Data using .loc[] Example:")
    try:
        print(df_hier.loc['Preethi'])
    except KeyError:
        print("No row found for 'Preethi', skipping .loc[] demo.")

    # Access data using .xs()
    print("\n🔍 Accessing Cross-section using .xs():")
    print(df_hier.xs(df_filled.iloc[1, 0], level=0))
else:
    print("\n⚠️ Dataset does not have enough columns for hierarchical indexing demonstration.")

# =============================================
# 🧩 TASK 4: PIVOT TABLES
# =============================================

# We’ll create a pivot table if numeric columns exist
num_cols = df_filled.select_dtypes(include=['int64', 'float64']).columns
if len(num_cols) >= 1 and len(df_filled.columns) >= 2:
    pivot = pd.pivot_table(df_filled, values=num_cols[0],
                           index=df_filled.columns[1],
                           aggfunc='mean')
    print("\n📊 Pivot Table (Average of numeric column):\n", pivot)
else:
    print("\n⚠️ Not enough numeric data for pivot table example.")

# =============================================
# 🧾 SUMMARY FINDINGS
# =============================================

print("\n📄 SUMMARY FINDINGS:")
print("""
1️⃣ Dataset successfully loaded and explored.
2️⃣ Missing data handled using mean/mode imputation.
3️⃣ Hierarchical indexing demonstrated using first two columns.
4️⃣ Pivot table created to analyze average values by category.
5️⃣ Custom Series and DataFrame created manually.
✅ Assignment Complete!
""")


✅ Dataset Loaded Successfully!
         date                 datetime cash_type                 card  money  \
0  2024-03-01  2024-03-01 10:15:50.520      card  ANON-0000-0000-0001   38.7   
1  2024-03-01  2024-03-01 12:19:22.539      card  ANON-0000-0000-0002   38.7   
2  2024-03-01  2024-03-01 12:20:18.089      card  ANON-0000-0000-0002   38.7   
3  2024-03-01  2024-03-01 13:46:33.006      card  ANON-0000-0000-0003   28.9   
4  2024-03-01  2024-03-01 13:48:14.626      card  ANON-0000-0000-0004   38.7   

     coffee_name  
0          Latte  
1  Hot Chocolate  
2  Hot Chocolate  
3      Americano  
4          Latte  
✅ Dataset Loaded Successfully!

First 5 Rows:
          date                 datetime cash_type                 card  money  \
0  2024-03-01  2024-03-01 10:15:50.520      card  ANON-0000-0000-0001   38.7   
1  2024-03-01  2024-03-01 12:19:22.539      card  ANON-0000-0000-0002   38.7   
2  2024-03-01  2024-03-01 12:20:18.089      card  ANON-0000-0000-0002   38.7   
3  2024