# Data Analysis on Titanic Dataset

## 1. Import and Load

In [None]:
import pandas as pd
import numpy as np

df = pd.read_csv('tested.csv')
df.head()

## 2. Inspect and Check Missing Values

In [None]:
print(df.info())
print("\nMissing values:")
print(df.isnull().sum())

## 3. Clean Data

In [None]:
df.drop(columns=['Cabin'], inplace=True, errors='ignore')
df['Age'] = df['Age'].fillna(df['Age'].median())
df['Fare'] = df['Fare'].fillna(df['Fare'].mean())

## 4. Remove Duplicates and Convert Types

In [None]:
rows_before = len(df)
df.drop_duplicates(inplace=True)
rows_after = len(df)

print(f"Rows before: {rows_before}, Rows after: {rows_after}")
print(f"Duplicates removed: {rows_before - rows_after}")

df['Age'] = df['Age'].astype(int)
print("Age converted to integer.")

## 5. Feature Engineering

In [None]:
df['FamilySize'] = df['SibSp'] + df['Parch'] + 1
df['AgeCategory'] = pd.cut(df['Age'], bins=[0, 12, 19, 60, 200], labels=['Child', 'Teen', 'Adult', 'Senior'])
df.head()

## 6. Save Data

In [None]:
df.to_csv('tested_cleaned.csv', index=False)
print("Result saved to tested_cleaned.csv")