In [None]:
import pandas as pd
import seaborn as sns

# Load the dataset (from Seaborn)
df = sns.load_dataset('titanic')

# Display first few rows
print("First 5 Rows of Data:")
print(df.head())

# ---------------- DATA INSPECTION ----------------
print("\nDataset Info:")
print(df.info())

print("\nSummary Statistics:")
print(df.describe())

print("\nMissing Values Count:")
print(df.isnull().sum())

# ---------------- DATA SELECTION ----------------
# Selecting specific columns
df_selected = df[['survived', 'pclass', 'sex', 'age', 'fare']]
print("\nSelected Columns:")
print(df_selected.head())

# ---------------- DATA FILTERING ----------------
# Filtering: Passengers who survived and were in first class
df_filtered = df[(df['survived'] == 1) & (df['pclass'] == 1)]
print("\nFiltered Data (Survived & First Class):")
print(df_filtered.head())

# ---------------- HANDLING MISSING DATA ----------------
# Fill missing 'age' values with median age
df['age'].fillna(df['age'].median(), inplace=True)

# Drop rows where 'embarked' is missing
df.dropna(subset=['embarked'], inplace=True)

# Fill missing 'deck' values with 'Unknown'
df['deck'].fillna('Unknown', inplace=True)

# Verify missing values are handled
print("\nMissing Values After Cleaning:")
print(df.isnull().sum())

# Display final dataset after cleaning
print("\nFinal Cleaned Dataset:")
print(df.head())