# Titanic Dataset: Data Cleaning & Preprocessing

**Objective:** Clean and prepare Titanic dataset for machine learning.

**Tools:** Python, Pandas, NumPy, Matplotlib/Seaborn

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Load the Titanic dataset
df = pd.read_csv('Titanic-Dataset.csv')
df.head()

## Basic Exploration

In [None]:
df.info()

In [None]:
df.describe()

## Missing Values

In [None]:
df.isnull().sum()

In [None]:
# Fill or drop missing values
df['Age'].fillna(df['Age'].median(), inplace=True)
df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)
df.drop(columns=['Cabin'], inplace=True)

## Duplicates

In [None]:
df.duplicated().sum()

In [None]:
df.drop_duplicates(inplace=True)

## Data Type Conversion

In [None]:
# Convert 'Pclass' to category
df['Pclass'] = df['Pclass'].astype('category')

## Exploratory Data Analysis (EDA)

In [None]:
sns.countplot(x='Survived', data=df)
plt.title('Survival Count')
plt.show()

In [None]:
sns.histplot(df['Age'], bins=20)
plt.title('Age Distribution')
plt.show()

In [None]:
sns.boxplot(x='Pclass', y='Fare', data=df)
plt.title('Fare by Class')
plt.show()

## Save Cleaned Dataset

In [None]:
df.to_csv('cleaned_titanic.csv', index=False)