## 📂 Import Libraries


In [59]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots


## 📊 Load Dataset
 load the Titanic dataset using seaborn's built-in datasets.

In [60]:
# Load  dataset
df = sns.load_dataset('titanic')


### 🔍 Preview Data


In [61]:
# first 5 rows
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


### ℹ️ Dataset Information
We check data types and non-null counts for each column.

In [62]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 15 columns):
 #   Column       Non-Null Count  Dtype   
---  ------       --------------  -----   
 0   survived     891 non-null    int64   
 1   pclass       891 non-null    int64   
 2   sex          891 non-null    object  
 3   age          714 non-null    float64 
 4   sibsp        891 non-null    int64   
 5   parch        891 non-null    int64   
 6   fare         891 non-null    float64 
 7   embarked     889 non-null    object  
 8   class        891 non-null    category
 9   who          891 non-null    object  
 10  adult_male   891 non-null    bool    
 11  deck         203 non-null    category
 12  embark_town  889 non-null    object  
 13  alive        891 non-null    object  
 14  alone        891 non-null    bool    
dtypes: bool(2), category(2), float64(2), int64(4), object(5)
memory usage: 80.7+ KB


In [63]:
df.shape

(891, 15)

In [64]:
#survivors only
len(df[df['survived'] == 1])


342

In [65]:
#  survived women
print(f"number women survivor: {len(df[(df['sex'] == 'female') & (df['survived'] == 1)])}")



number women survivor: 233


In [66]:
# First class passengers and paid more than 50
print(f"first class  with fare > 50: {len(df[(df['pclass'] == 1) & (df['survived'] == 1) & (df['fare'] > 50)])}")


first class  with fare > 50: 102


In [67]:
bas_info = df[['survived', 'pclass', 'sex', 'age', 'fare']]
print(" passen information ")
bas_info.head()


 passen information 


Unnamed: 0,survived,pclass,sex,age,fare
0,0,3,male,22.0,7.25
1,1,1,female,38.0,71.2833
2,1,3,female,26.0,7.925
3,1,1,female,35.0,53.1
4,0,3,male,35.0,8.05


## 📈 Exploratory Data Analysis (EDA)


In [68]:
#  slicing with loc
class_one= df.loc[df['pclass'] == 1, ['sex', 'age', 'fare', 'survived']]
print(f"\nfirst class")
class_one.head(10)


first class


Unnamed: 0,sex,age,fare,survived
1,female,38.0,71.2833,1
3,female,35.0,53.1,1
6,male,54.0,51.8625,0
11,female,58.0,26.55,1
23,male,28.0,35.5,1
27,male,19.0,263.0,0
30,male,40.0,27.7208,0
31,female,,146.5208,1
34,male,28.0,82.1708,0
35,male,42.0,52.0,0


In [69]:
# aggregations
stats = df.agg({
    'adult_male': ['sum', 'mean', 'count'],
    'age': ['mean', 'median', 'std', 'min', 'max'],
    'fare': ['mean', 'median', 'std', 'min', 'max']
})
stats

Unnamed: 0,adult_male,age,fare
sum,537.0,,
mean,0.602694,29.699118,32.204208
count,891.0,,
median,,28.0,14.4542
std,,14.526497,49.693429
min,,0.42,0.0
max,,80.0,512.3292


In [70]:
df

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,no,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,no,True
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True
888,0,3,female,,1,2,23.4500,S,Third,woman,False,,Southampton,no,False
889,1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,yes,True


In [71]:
# group by  gender
gender = df.groupby('sex').agg({
    'survived': ['count'],
    'age': ['mean', 'max','min'],
    'fare': ['sum','mean', 'median']
}).round(2)
gender

Unnamed: 0_level_0,survived,age,age,age,fare,fare,fare
Unnamed: 0_level_1,count,mean,max,min,sum,mean,median
sex,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
female,314,27.92,63.0,0.75,13966.66,44.48,23.0
male,577,30.73,80.0,0.42,14727.29,25.52,10.5


In [72]:
# group by gender and class
class_gender= df.groupby(['pclass', 'sex']).agg({
    'survived': 'count',
    'age': ['mean', 'max', 'min'],
    'fare': 'sum'
}).round(2)
class_gender

Unnamed: 0_level_0,Unnamed: 1_level_0,survived,age,age,age,fare
Unnamed: 0_level_1,Unnamed: 1_level_1,count,mean,max,min,sum
pclass,sex,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
1,female,94,34.61,63.0,2.0,9975.83
1,male,122,41.28,80.0,0.92,8201.59
2,female,76,28.72,57.0,2.0,1669.73
2,male,108,30.74,70.0,0.67,2132.11
3,female,144,21.75,63.0,0.75,2321.11
3,male,347,26.51,74.0,0.42,4393.59


In [90]:
# function to calculate z-score
def zscore(series):
    return (series - series.mean()) / series.std()

#  transform with custom function
df['fare_zscore'] = df.groupby('age_group')['fare'].transform(zscore)






In [74]:
df

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,no,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,no,True
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True
888,0,3,female,,1,2,23.4500,S,Third,woman,False,,Southampton,no,False
889,1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,yes,True


In [75]:
# Create age groups using bins and labels

age_groups = pd.cut(df['age'], bins=[0, 18, 35, 60, 100], labels=['chlid', 'young', 'adult', 'old'])
df['age_group'] = age_groups

age_group= df.groupby('age_group').agg({
    'survived': ['count'],
    'fare': ['mean', 'sum'],
}).round(2)
age_group





Unnamed: 0_level_0,survived,fare,fare
Unnamed: 0_level_1,count,mean,sum
age_group,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
chlid,139,32.5,4517.6
young,358,29.93,10713.48
adult,195,44.26,8630.63
old,22,41.37,910.17


In [76]:
df

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone,age_group
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,no,False,young
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False,adult
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True,young
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False,young
4,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,no,True,young
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,no,True,young
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True,young
888,0,3,female,,1,2,23.4500,S,Third,woman,False,,Southampton,no,False,
889,1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,yes,True,young


In [77]:

# Apply to rows - create custom passenger profile
def rofile(row):
    score = 20
    if pd.notna(row['age']):
        if row['age'] < 18:
            score += 15
        elif row['age'] > 60:
            score -= 10
    # Class 
    if row['pclass'] == 1:
        score += 15
    elif row['pclass'] == 2:
        score += 5
    else:
        score -= 5
    
    # Fare 
    if pd.notna(row['fare']) :
        if (row['fare']< df['fare'].mean()):
            score -= 10
        if (row['fare']>df['fare'].mean()):
            score += 10
    return score

df['survival_score'] = df.apply(rofile, axis=1)

## 📊 GroupBy and Aggregations


In [78]:
df

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone,age_group,survival_score
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,no,False,young,5
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False,adult,45
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True,young,5
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False,young,45
4,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,no,True,young,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,no,True,young,15
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True,young,25
888,0,3,female,,1,2,23.4500,S,Third,woman,False,,Southampton,no,False,,5
889,1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,yes,True,young,25


In [79]:
# queries
df.query("survived == 1")


Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone,age_group,survival_score
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False,adult,45
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True,young,5
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False,young,45
8,1,3,female,27.0,0,2,11.1333,S,Third,woman,False,,Southampton,yes,False,young,5
9,1,2,female,14.0,1,0,30.0708,C,Second,child,False,,Cherbourg,yes,False,chlid,30
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
875,1,3,female,15.0,0,0,7.2250,C,Third,child,False,,Cherbourg,yes,True,chlid,20
879,1,1,female,56.0,0,1,83.1583,C,First,woman,False,C,Cherbourg,yes,False,adult,45
880,1,2,female,25.0,0,1,26.0000,S,Second,woman,False,,Southampton,yes,False,young,15
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True,young,25


In [80]:
# queries with multiple condi
df.query("pclass == 1 and sex == 'female'")


Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone,age_group,survival_score
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False,adult,45
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False,young,45
11,1,1,female,58.0,0,0,26.5500,S,First,woman,False,C,Southampton,yes,True,adult,25
31,1,1,female,,1,0,146.5208,C,First,woman,False,B,Cherbourg,yes,False,,45
52,1,1,female,49.0,1,0,76.7292,C,First,woman,False,D,Cherbourg,yes,False,adult,45
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
856,1,1,female,45.0,1,1,164.8667,S,First,woman,False,,Southampton,yes,False,adult,45
862,1,1,female,48.0,0,0,25.9292,S,First,woman,False,D,Southampton,yes,True,adult,25
871,1,1,female,47.0,1,1,52.5542,S,First,woman,False,D,Southampton,yes,False,adult,45
879,1,1,female,56.0,0,1,83.1583,C,First,woman,False,C,Cherbourg,yes,False,adult,45


In [81]:
# query wit muliple condi
df.query("(pclass == 1 or (sex == 'female' and age < 18)) and survived == 1")


Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone,age_group,survival_score
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False,adult,45
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False,young,45
9,1,2,female,14.0,1,0,30.0708,C,Second,child,False,,Cherbourg,yes,False,chlid,30
10,1,3,female,4.0,1,1,16.7000,S,Third,child,False,G,Southampton,yes,False,chlid,20
11,1,1,female,58.0,0,0,26.5500,S,First,woman,False,C,Southampton,yes,True,adult,25
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
871,1,1,female,47.0,1,1,52.5542,S,First,woman,False,D,Southampton,yes,False,adult,45
875,1,3,female,15.0,0,0,7.2250,C,Third,child,False,,Cherbourg,yes,True,chlid,20
879,1,1,female,56.0,0,1,83.1583,C,First,woman,False,C,Cherbourg,yes,False,adult,45
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True,young,25


## 📉 Visualizations with Plotly


In [82]:
# number of survived and died
sur_counts = df['survived'].value_counts()
fig = make_subplots(rows=1, cols=1, specs=[[{'type':'domain'}]])
fig.add_trace(
    go.Pie(labels=['died', 'survived'],
           values=sur_counts.values,
           name="Survival"),
    row=1, col=1
)
fig.update_layout(title_text=" Survival Distribution")

fig.show()

In [83]:
# number of passengers in each class
class_counts = df['pclass'].value_counts().sort_index()
fig = make_subplots(
    rows=1, cols=2,
    specs=[[{'type':'domain'}, {'type':'domain'}]],
    subplot_titles=["Survival", "Passenger Class"]
)
fig.add_trace(
    go.Pie(labels=[f'Class {i}' for i in class_counts.index],
           values=class_counts.values,
           name="Class"),
    row=1, col=2
)

fig.update_layout(title_text="Titanic Dataset - Survival & Class Distribution")

fig.show()

In [84]:
# Count passengers grouped by sex
sur_by_sex = df.groupby("sex")["survived"].count().reset_index()

fig = px.bar(sur_by_sex, x="sex", y="survived",
             title="number  by Sex",
             )
fig.show()

In [85]:
# group  by passenger class, sex, and embark town
class_sex_embark = (
    df.groupby(["pclass", "sex", "embark_town"])["survived"]
      .count()
      .reset_index()
)

# Create grouped bar chart 
fig = px.bar(
    class_sex_embark,
    x="pclass",                  
    y="survived",                
    color="sex",                 
    barmode="group",             
    facet_col="embark_town",     
    labels={"pclass": "Passenger Class", "survived": "Number of Survivors"},
    title="Number of Survivors by Class, Sex, and Embark Town"
)

# Show figure
fig.show()


In [86]:
# survivors and group by class and sex
survivors_df = (
    df[df["survived"] == 1]
    .groupby(["pclass", "sex"])["survived"]
    .count()
    .reset_index(name="count")
)

# Calculate total survivors 
total_df = (
    df[df["survived"] == 1]
    .groupby("pclass")["survived"]
    .count()
    .reset_index(name="count")
)

# Add a new column to label totals
total_df["sex"] = "total"  

# Combine both DataFrames (sex split + total)
plot_df = pd.concat([survivors_df, total_df])

# Create grouped bar chart
fig = px.bar(
    plot_df,
    x="pclass",                 
    y="count",                  
    color="sex",                
    barmode="group",            
    labels={"pclass": "Passenger Class", "count": "Number of Survivors"},
    title="Number of Survivors by Class (Male, Female, Total)"
)

fig.show()


In [87]:
fig = px.scatter(
    df,
    x="age",
    y="fare",
    color="survived",
    size="fare",
    hover_data=["sex", "pclass"],
    title="Fare vs Age"
)
fig.show()


In [88]:
fig = px.histogram(
    df,
    x="age",
    color="survived",
    nbins=30,
    barmode="overlay",
    title="Age Distribution by Survival",
    labels={"survived": "Survived (0=No, 1=Yes)"}
)
fig.show()


In [89]:
surv_by_embark = df.groupby("embark_town")["survived"].count().reset_index()

fig = px.bar(
    surv_by_embark,
    x="embark_town",
    y="survived",
    text="survived",
    color='survived',
    title="Survival Rate by Embark Town",
)
fig.show()
