In [3]:
import pandas as pd

# 1. Load Data
url = "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"
df = pd.read_csv(url)

print(f"Dataset Loaded. Columns: {df.columns.tolist()}")

# --- TASK 1: Custom Function with .apply() ---
# Goal: Bucket continuous 'Age' into categories

def get_age_group(age):
    # Handle NaN (Missing) values first to avoid errors
    if pd.isna(age):
        return "Unknown"
    elif age < 18:
        return "Child"
    elif age < 60:
        return "Adult"
    else:
        return "Senior"

# Apply the function row-by-row
df['Age_Group'] = df['Age'].apply(get_age_group)

print("\n--- Task 1: Age Group Created ---")
print(df[['Age', 'Age_Group']].head(10))


# --- TASK 2: Lambda Function ---
# Goal: Calculate Family Size (SibSp + Parch)
# Logic: Sibling/Spouse + Parent/Child + 1 (The passenger themselves)

df['Family_Size'] = df.apply(lambda row: row['SibSp'] + row['Parch'] + 1, axis=1)

print("\n--- Task 2: Family Size Calculated ---")
print(df[['Name', 'SibSp', 'Parch', 'Family_Size']].head())


# --- TASK 3: Binary Encoding with Lambda ---
# Goal: Convert Sex (male/female) to numbers (0/1) for potential AI usage later
df['Sex_Code'] = df['Sex'].apply(lambda x: 1 if x == 'female' else 0)

print("\n--- Task 3: Sex Encoded ---")
print(df[['Sex', 'Sex_Code']].head())

Dataset Loaded. Columns: ['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp', 'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked']

--- Task 1: Age Group Created ---
    Age Age_Group
0  22.0     Adult
1  38.0     Adult
2  26.0     Adult
3  35.0     Adult
4  35.0     Adult
5   NaN   Unknown
6  54.0     Adult
7   2.0     Child
8  27.0     Adult
9  14.0     Child

--- Task 2: Family Size Calculated ---
                                                Name  SibSp  Parch  \
0                            Braund, Mr. Owen Harris      1      0   
1  Cumings, Mrs. John Bradley (Florence Briggs Th...      1      0   
2                             Heikkinen, Miss. Laina      0      0   
3       Futrelle, Mrs. Jacques Heath (Lily May Peel)      1      0   
4                           Allen, Mr. William Henry      0      0   

   Family_Size  
0            2  
1            2  
2            1  
3            2  
4            1  

--- Task 3: Sex Encoded ---
      Sex  Sex_Code
0    male    

In [4]:
import pandas as pd

# 1. Load Data
url = "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"
df = pd.read_csv(url)

print(f"Dataset Loaded. Rows: {len(df)}")

# --- TASK 1: Analysis by Class (The "Rich" Factor) ---
# Goal: Find survival rate for 1st, 2nd, and 3rd Class
# Logic: Since Survived is 0 (Dead) or 1 (Alive), the Mean is the % of survivors.
class_survival = df.groupby('Pclass')['Survived'].mean() * 100

print("\n--- Survival Rate by Class (%) ---")
print(class_survival)
# Expected Result: 1st Class (~62%) > 2nd Class (~47%) > 3rd Class (~24%)


# --- TASK 2: Analysis by Gender (The "Women First" Factor) ---
# Goal: Find survival rate for Males vs Females
gender_survival = df.groupby('Sex')['Survived'].mean() * 100

print("\n--- Survival Rate by Gender (%) ---")
print(gender_survival)
# Expected Result: Female (~74%) >>> Male (~18%)


# --- TASK 3: Multi-Level Grouping (The Interaction) ---
# Goal: Compare Rich Males vs Poor Females
# Syntax: groupby(['Col1', 'Col2'])
complex_stats = df.groupby(['Pclass', 'Sex'])['Survived'].mean() * 100

print("\n--- Detailed Survival Analysis (%) ---")
print(complex_stats)

Dataset Loaded. Rows: 891

--- Survival Rate by Class (%) ---
Pclass
1    62.962963
2    47.282609
3    24.236253
Name: Survived, dtype: float64

--- Survival Rate by Gender (%) ---
Sex
female    74.203822
male      18.890815
Name: Survived, dtype: float64

--- Detailed Survival Analysis (%) ---
Pclass  Sex   
1       female    96.808511
        male      36.885246
2       female    92.105263
        male      15.740741
3       female    50.000000
        male      13.544669
Name: Survived, dtype: float64
