In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

sns.set(style="whitegrid")
plt.rcParams['figure.figsize'] = (10, 6)

url = "https://web.stanford.edu/class/archive/cs/cs109/cs109.1166/stuff/titanic.csv"
df = pd.read_csv(url)

df.rename(columns={
    'Siblings/Spouses Aboard': 'SibSp',
    'Parents/Children Aboard': 'Parch'
}, inplace=True)

print("Data Loaded Successfully!")
df.head()

In [None]:
print("--- Numerical Statistics ---")
display(df.describe())

print("\n--- Categorical Statistics (Unique Values) ---")
display(df.describe(include=['O']))

In [None]:
plt.figure(figsize=(6, 4))
sns.countplot(x='Survived', data=df, palette='pastel')
plt.title("Distribution of Survival (0=Died, 1=Survived)")
plt.show()

fig, ax = plt.subplots(1, 2, figsize=(14, 5))

sns.histplot(df['Age'], kde=True, color='skyblue', ax=ax[0])
ax[0].set_title('Age Distribution')

sns.histplot(df['Fare'], kde=True, color='salmon', ax=ax[1])
ax[1].set_title('Fare Distribution')

plt.show()

In [None]:
plt.figure(figsize=(6, 4))
sns.countplot(x='Survived', data=df, palette='pastel')
plt.title("Distribution of Survival (0=Died, 1=Survived)")
plt.show()

fig, ax = plt.subplots(1, 2, figsize=(14, 5))

sns.histplot(df['Age'], kde=True, color='skyblue', ax=ax[0])
ax[0].set_title('Age Distribution')

sns.histplot(df['Fare'], kde=True, color='salmon', ax=ax[1])
ax[1].set_title('Fare Distribution')

plt.show()

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(14, 6))

sns.boxplot(x='Survived', y='Age', data=df, ax=ax[0], palette='coolwarm')
ax[0].set_title('Age Comparison: Survived vs Died')

sns.boxplot(x='Survived', y='Fare', data=df, ax=ax[1], palette='coolwarm')
ax[1].set_yscale('log') 
ax[1].set_title('Fare Paid Comparison (Log Scale)')

plt.show()

In [None]:
plt.figure(figsize=(12, 6))
sns.violinplot(x="Pclass", y="Age", hue="Survived", data=df, split=True, palette={0: "r", 1: "g"})
plt.title("Age Distribution by Class and Survival Status")
plt.show()

In [None]:
corr_df = df.copy()

corr_df['Sex'] = corr_df['Sex'].map({'male': 0, 'female': 1})

corr_df = corr_df.drop(['Name'], axis=1)

plt.figure(figsize=(10, 8))
sns.heatmap(corr_df.corr(), annot=True, cmap='coolwarm', fmt=".2f", linewidths=0.5)
plt.title("Correlation Heatmap (Mixed Types Encoded)")
plt.show()