In [None]:
# Step 1: Import Libraries
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Step 2: Load the Titanic Dataset
df = pd.read_csv('titanic.csv')  # Ensure this path matches your file

# Step 3: Display Basic Info and First Few Rows
print("Initial Data Snapshot:")
print(df.head())
print("\nMissing Values Per Column:")
print(df.isnull().sum())

# Step 4: Visualize Missing Data
plt.figure(figsize=(10, 6))
sns.heatmap(df.isnull(), cbar=False, cmap='viridis')
plt.title("Missing Values in Titanic Dataset (Before Cleaning)")
plt.show()

# Step 5: Handle Missing Data
# Fill Age (numerical) with median
df['Age'].fillna(df['Age'].median(), inplace=True)

# Fill Embarked (categorical) with mode
df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)

# Fill Fare (numerical) with median if missing
df['Fare'].fillna(df['Fare'].median(), inplace=True)

# Fill Cabin (categorical) with "Unknown"
df['Cabin'].fillna("Unknown", inplace=True)

# Step 6: Check for Remaining Missing Values
print("\nMissing Values After Cleaning:")
print(df.isnull().sum())

# Step 7: Visualize Again to Confirm Cleaning
plt.figure(figsize=(10, 6))
sns.heatmap(df.isnull(), cbar=False, cmap='viridis')
plt.title("Missing Values in Titanic Dataset (After Cleaning)")
plt.show()

# Step 8: Save Cleaned Dataset (optional)
df.to_csv('titanic_cleaned.csv', index=False)