In [5]:
import pandas as pd
import numpy as np
import plotly.express as px

In [None]:
def survival_demographics():
    """Creating a function to analyze survival patterns based
    on passenger class, sex, and age group."""
    
    # Loading and copying the dataset
    url = 'https://raw.githubusercontent.com/leontoddjohnson/datasets/main/data/titanic.csv'
    df = pd.read_csv(url)
    df1 = df.copy()
    
    # Creating bins for age groups and assigning labels
    bins = [0, 12, 19, 59, float('inf')]
    labels = ['Child', 'Teenager', 'Adult', 'Senior']
    df1['AgeGroup'] = pd.cut(df1['Age'], bins=bins, labels=labels, right=True)
    
    # Group by class, sex, and age group
    grouped = df1.groupby(['Pclass', 'Sex', 'AgeGroup'])
    
    # Calculate the number of passengers and survivors
    results = grouped.agg(
        TotalPassengers=('Survived', 'count'),
        Survivors=('Survived', 'sum')
    ).reset_index()
    
    # Calculate survival rate
    results['SurvivalRate'] = (results['Survivors'] / results['TotalPassengers'])
    
    # Sort the results for better readability
    results = results.sort_values(by=['Pclass', 'Sex', 'AgeGroup'])
    
    # Display the results
    return results

In [7]:
survival_demographics()

  grouped = df1.groupby(['Pclass', 'Sex', 'AgeGroup'])


Unnamed: 0,Pclass,Sex,AgeGroup,TotalPassengers,Survivors,SurvivalRate
0,1,female,Child,1,0,0.0
1,1,female,Teenager,13,13,1.0
2,1,female,Adult,68,66,0.970588
3,1,female,Senior,3,3,1.0
4,1,male,Child,3,3,1.0
5,1,male,Teenager,4,1,0.25
6,1,male,Adult,80,34,0.425
7,1,male,Senior,14,2,0.142857
8,2,female,Child,8,8,1.0
9,2,female,Teenager,8,8,1.0


In [31]:
def visualize_demographic():
    """Visualizing survival rates by passenger class,
    sex, and age group using a bar chart."""
    
    # Get the survival demographics data
    results = survival_demographics()
    
    # Aggregate the data to get the overall survival rate by gender
    gender_data = results.groupby('Sex', as_index=False).agg(
        TotalPassengers=('TotalPassengers', 'sum'),
        Survivors=('Survivors', 'sum')
    )
    gender_data['SurvivalRate'] = gender_data['Survivors'] / gender_data['TotalPassengers']
    
    # Create a bar chart using Plotly Express
    fig = px.bar(
        gender_data,
        x='Sex',
        y='SurvivalRate',
        title='Survival Rate by Gender',
        color='Sex',
        labels={'SurvivalRate': 'Survival Rate', 'Sex': 'Gender'},
        color_discrete_map={'male': 'blue', 'female': 'pink'},
    )

    fig.update_layout(
        xaxis_title='Gender',
        yaxis_title='Survival Rate',
        yaxis_tickformat='.0%',
        yaxis=dict(range=[0, 1]),
        title_x=0.5,
        template='plotly_white'
    )
    
    return fig

In [32]:
visualize_demographic()





In [8]:
def family_groups():
    """Creating a function to explore the relationship
    between family size, passenger class, and ticket fare."""
    
    # Loading and copying the dataset
    url = 'https://raw.githubusercontent.com/leontoddjohnson/datasets/main/data/titanic.csv'
    df = pd.read_csv(url)
    df1 = df.copy()
    
    # Calculate family size
    df1['family_size'] = df1['SibSp'] + df1['Parch'] + 1
    
    # Group by family size and passenger class
    grouped = df1.groupby(['family_size', 'Pclass'])
    
    # Calculate average fare and number of passengers
    results = grouped.agg(
        avg_fare=('Fare', 'mean'),
        num_passengers=('PassengerId', 'count'),
        min_fare=('Fare', 'min'),
        max_fare=('Fare', 'max')
    ).reset_index()
    
    # Sorting the results by Pclass and family_size in ascending order
    results = results.sort_values(by=['Pclass', 'family_size'], ascending=[True, True])
    
    # Display the results
    return results

In [9]:
family_groups()

Unnamed: 0,family_size,Pclass,avg_fare,num_passengers,min_fare,max_fare
0,1,1,63.672514,109,0.0,512.3292
3,2,1,91.848039,70,29.7,512.3292
6,3,1,95.681075,24,26.2833,211.5
9,4,1,133.521429,7,120.0,151.55
12,5,1,262.375,2,262.375,262.375
15,6,1,263.0,4,263.0,263.0
1,1,2,14.066106,104,0.0,73.5
4,2,2,24.682962,34,11.5,33.0
7,3,2,31.693819,31,13.0,73.5
10,4,2,36.575969,13,11.5,65.0
