In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load dataset
df = pd.read_csv("titanic.csv")

# Function to clean data
def clean_data(data):
    for col in data.select_dtypes(include=[np.number]):
        data[col].fillna(data[col].mean(), inplace=True)
    for col in data.select_dtypes(include=['object']):
        data[col].fillna(data[col].mode()[0], inplace=True)
    data.dropna(inplace=True)
    data.drop_duplicates(inplace=True)
    for col in data.select_dtypes(include=['object']):
        data[col] = data[col].str.title().str.strip()
    return data

df = clean_data(df)

# Function to show summary statistics
def show_summary(data):
    print("\nNumerical Summary:\n", data.describe())
    print("\nMost Common Categorical Values:")
    for col in data.select_dtypes(include='object'):
        print(f"{col}: {data[col].mode()[0]}")
    num_cols = data.select_dtypes(include=[np.number]).columns
    for col in num_cols:
        print(f"\nVariance ({col}):", round(np.var(data[col]), 2))
    if {'Age','Fare'}.issubset(data.columns):
        print("Correlation (Age vs Fare):", round(np.corrcoef(data['Age'], data['Fare'])[0,1],2))

# Function to display visualizations
def show_visualizations(data):
    if 'Age' in data.columns:
        data['Age'].plot(kind='hist', bins=20, color='skyblue', edgecolor='black', title='Age Distribution')
        plt.xlabel('Age'); plt.ylabel('Frequency'); plt.show()
    if 'Sex' in data.columns:
        data['Sex'].value_counts().plot(kind='bar', color=['lightcoral','lightblue'], title='Gender Distribution')
        plt.xlabel('Gender'); plt.ylabel('Count'); plt.show()
    if {'Age','Fare'}.issubset(data.columns):
        plt.scatter(data['Age'], data['Fare'], color='green', alpha=0.6)
        plt.title('Age vs Fare'); plt.xlabel('Age'); plt.ylabel('Fare'); plt.show()

# Console Menu
while True:
    print("\n=== EDA Menu ===")
    print("1. Show Summary Statistics")
    print("2. Clean Missing Values")
    print("3. Display Visualizations")
    print("4. Exit")
    
    choice = input("Enter your choice: ")
    
    if choice == '1':
        show_summary(df)
    elif choice == '2':
        df = clean_data(df)
        print("✅ Missing values handled and data cleaned!")
    elif choice == '3':
        show_visualizations(df)
    elif choice == '4':
        print("👋 Exiting program.")
        break
    else:
        print("❌ Invalid choice. Please try again.")
