In [5]:
# ✅ Step 0: Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

# ✅ Check current notebook folder
print("📂 Current notebook folder is:")
print(os.getcwd())  # This tells you where to paste your CSV file

# ✅ Step 1: Load the dataset (CSV must be in the same folder as the notebook)
df = pd.read_csv("titanic.csv")  # Make sure the file is renamed as titanic.csv and placed here
print("\n✅ Titanic dataset loaded successfully!")

# ✅ Step 2: Explore the data
print("\n📄 First 5 rows of data:\n", df.head())
print("\n🔢 Dataset shape:", df.shape)
print("\n📊 Data types:\n", df.dtypes)
print("\n❓ Missing values:\n", df.isnull().sum())

# ✅ Step 3: Handle missing values
df['Age'].fillna(df['Age'].median(), inplace=True)
df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)
df.drop('Cabin', axis=1, inplace=True)  # Too many missing values

print("\n✅ Missing values handled.\n", df.isnull().sum())

# ✅ Step 4: Convert categorical to numerical (Encoding)
df['Sex'] = df['Sex'].map({'male': 0, 'female': 1})
df = pd.get_dummies(df, columns=['Embarked'], drop_first=True)

print("\n🔧 Categorical variables encoded.\n", df.head())

# ✅ Step 5: Normalize/Standardize numerical features
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
df[['Age', 'Fare']] = scaler.fit_transform(df[['Age', 'Fare']])

print("\n📏 Numerical features standardized.\n", df[['Age', 'Fare']].head())

# ✅ Step 6: Visualize and remove outliers
from scipy import stats
import numpy as np

plt.figure(figsize=(10, 5))
sns.boxplot(data=df[['Age', 'Fare']])
plt.title('📦 Outliers (Before Cleaning)')
plt.show()

z_scores = np.abs(stats.zscore(df[['Age', 'Fare']]))
df = df[(z_scores < 3).all(axis=1)]  # Keep only values within z < 3

print("\n🧹 Outliers removed. New shape:", df.shape)

# ✅ Step 7: Final check
print("\n🚀 Final dataset ready for ML training!\n", df.head())


📂 Current notebook folder is:
C:\Users\SHIVAIN GUPTA


FileNotFoundError: [Errno 2] No such file or directory: 'titanic.csv'