In [None]:
# 📦 Step 1: Import Required Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Set style for plots
sns.set(style='whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

# 📥 Step 2: Load the Data (CSV or Excel)
# Upload file in Google Colab manually or use a path
# from google.colab import files
# uploaded = files.upload()

# Example for CSV:
df = pd.read_csv('your_file.csv')

# If using Excel:
# df = pd.read_excel('your_file.xlsx')

# 👀 Step 3: Display Basic Info
print("📊 First 5 Rows:")
display(df.head())

print("\n🔍 Data Info:")
df.info()

print("\n📏 Summary Statistics:")
display(df.describe())

print("\n🧩 Missing Values:")
print(df.isnull().sum())

print("\n🔁 Duplicates:")
print(df.duplicated().sum())

# 🧮 Step 4: Check Data Types
print("\n🔠 Data Types:")
print(df.dtypes)

# 📊 Step 5: Univariate Analysis
for col in df.select_dtypes(include='number').columns:
    sns.histplot(df[col], kde=True)
    plt.title(f'Distribution of {col}')
    plt.xlabel(col)
    plt.ylabel('Frequency')
    plt.show()

# 🔗 Step 6: Correlation Heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(df.corr(numeric_only=True), annot=True, cmap='coolwarm')
plt.title('Correlation Matrix')
plt.show()

# 📈 Step 7: Boxplots for Outlier Detection
for col in df.select_dtypes(include='number').columns:
    sns.boxplot(x=df[col])
    plt.title(f'Boxplot of {col}')
    plt.show()

# 📊 Step 8: Categorical Feature Analysis
for col in df.select_dtypes(include='object').columns:
    sns.countplot(y=df[col], order=df[col].value_counts().index)
    plt.title(f'Countplot of {col}')
    plt.show()
