# Titanic JSON Data Analysis
### Author: Javier Romero

### Description:
Load and analyze the Titanic passenger dataset from a JSON export.

## Step 1: Upload and Load the JSON File

In [None]:
import json
import pandas as pd
import numpy as np
from google.colab import files

# Upload the JSON file from your computer
uploaded = files.upload()

# Load the JSON file
filename = list(uploaded.keys())[0]
with open(filename, 'r') as f:
    data = json.load(f)

print(f"Loaded: {filename}")
print(f"Top-level keys: {list(data.keys())}")

## Step 2: Inspect Metadata

In [None]:
# Display metadata
print("=" * 50)
print("DATASET METADATA")
print("=" * 50)
for key, value in data['metadata'].items():
    print(f"  {key}: {value}")

## Step 3: Convert Passengers to DataFrame

In [None]:
# Create DataFrame from passenger list
df = pd.DataFrame(data['passengers'])

print(f"DataFrame shape: {df.shape}")
print(f"Columns: {list(df.columns)}")
df.head()

## Step 4: Descriptive Statistics

In [None]:
# Basic statistics for numeric columns
numeric_cols = df.select_dtypes(include='number').columns.tolist()
stats = pd.DataFrame({
    'Mean': df[numeric_cols].mean(),
    'Median': df[numeric_cols].median(),
    'Std Dev': df[numeric_cols].std()
})

print("--- Descriptive Statistics ---")
stats

## Step 5: Survival Analysis

In [None]:
# Survival rate by gender
print("Survival Rate by Gender:")
print(df.groupby('sex')['survived'].mean().round(3))

# Survival rate by class
print("\nSurvival Rate by Class:")
print(df.groupby('pclass')['survived'].mean().round(3))

# Survival rate by age group
print("\nSurvival Rate by Age Group:")
print(df.groupby('age_group')['survived'].mean().round(3))

# Survival rate by is_alone
print("\nSurvival Rate by IsAlone:")
print(df.groupby('is_alone')['survived'].mean().round(3))

## Step 6: Visualizations

In [None]:
import matplotlib.pyplot as plt

fig, axes = plt.subplots(2, 2, figsize=(12, 10))

# Survival by Gender
df.groupby('sex')['survived'].mean().plot(kind='bar', ax=axes[0, 0], color=['steelblue', 'coral'])
axes[0, 0].set_title('Survival Rate by Gender')
axes[0, 0].set_ylabel('Survival Rate')
axes[0, 0].set_xticklabels(axes[0, 0].get_xticklabels(), rotation=0)

# Survival by Class
df.groupby('pclass')['survived'].mean().plot(kind='bar', ax=axes[0, 1], color=['gold', 'silver', 'brown'])
axes[0, 1].set_title('Survival Rate by Class')
axes[0, 1].set_ylabel('Survival Rate')
axes[0, 1].set_xticklabels(axes[0, 1].get_xticklabels(), rotation=0)

# Survival by Age Group
age_order = ['Child', 'Young Adult', 'Adult', 'Senior', 'Unknown']
df.groupby('age_group')['survived'].mean().reindex(age_order).plot(kind='bar', ax=axes[1, 0], color='teal')
axes[1, 0].set_title('Survival Rate by Age Group')
axes[1, 0].set_ylabel('Survival Rate')
axes[1, 0].set_xticklabels(axes[1, 0].get_xticklabels(), rotation=45)

# Family Size distribution
df.groupby('family_size')['survived'].mean().plot(kind='bar', ax=axes[1, 1], color='mediumpurple')
axes[1, 1].set_title('Survival Rate by Family Size')
axes[1, 1].set_ylabel('Survival Rate')
axes[1, 1].set_xticklabels(axes[1, 1].get_xticklabels(), rotation=0)

plt.tight_layout()
plt.show()

## Step 7: Summary

In [None]:
print("=" * 50)
print("DATASET SUMMARY")
print("=" * 50)
print(f"Total passengers: {len(df)}")
print(f"Survived: {df['survived'].sum()} ({df['survived'].mean():.2%})")
print(f"Did not survive: {(df['survived'] == 0).sum()} ({1 - df['survived'].mean():.2%})")
print(f"Average age: {df['age'].mean():.1f}")
print(f"Average fare: {df['fare'].mean():.2f}")
print(f"Passengers traveling alone: {df['is_alone'].sum()} ({df['is_alone'].mean():.2%})")