In [None]:
!unzip "/content/archive (4).zip"

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

# Set styles
sns.set(style="whitegrid")


In [None]:
# Load the Titanic dataset
df = pd.read_csv("/content/Titanic-Dataset.csv")

# Preview first few rows
df.head()


In [None]:
# General information
df.info()

# Summary statistics for numerical columns
df.describe()

# Check missing values
df.isnull().sum()


In [None]:
numerical_features = ['Age', 'Fare', 'SibSp', 'Parch']

df[numerical_features].hist(bins=15, figsize=(12, 8), color='skyblue')
plt.suptitle('Histograms of Numerical Features', fontsize=16)
plt.show()


In [None]:
plt.figure(figsize=(12, 6))
for i, col in enumerate(numerical_features, 1):
    plt.subplot(1, 4, i)
    sns.boxplot(y=df[col])
    plt.title(f'Boxplot of {col}')
plt.tight_layout()
plt.show()


In [None]:
# Correlation matrix
corr_matrix = df[numerical_features].corr()
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt=".2f")
plt.title('Correlation Matrix')
plt.show()

# Pairplot (small sample for readability)
sns.pairplot(df[numerical_features + ['Survived']].dropna(), hue='Survived')
plt.show()


In [None]:
# Survival rate by gender
sns.countplot(data=df, x='Sex', hue='Survived')
plt.title('Survival Count by Gender')
plt.show()

# Age distribution by survival
sns.violinplot(data=df, x='Survived', y='Age', inner='quartile')
plt.title('Age Distribution by Survival')
plt.show()

# Class vs Survival
sns.barplot(data=df, x='Pclass', y='Survived')
plt.title('Survival Rate by Passenger Class')
plt.show()


##  feature-level inferences

In [None]:
# Females had a much higher survival rate than males.
# Younger passengers were more likely to survive.
# 1st class passengers had better survival chances than 2nd or 3rd.
# Fare has a few extreme outliers.
# SibSp and Parch are positively skewed.