# 📓 Data Science Homework
Fill in the code blocks and markdown cells as instructed below.

## Descriptive Statistics

In [None]:
# TODO: Load the dataset and compute mean, median, mode, std, IQR

import pandas as pd
from scipy import stats

df = pd.read_csv('/mnt/data/ACCIDENTS_GU_BCN_2013.csv')

numeric_cols = df.select_dtypes(include='number')

for col in numeric_cols.columns:
    mean_val = numeric_cols[col].mean()
    median_val = numeric_cols[col].median()
    mode_val = numeric_cols[col].mode().iloc[0] if not numeric_cols[col].mode().empty else None
    std_val = numeric_cols[col].std()
    iqr_val = stats.iqr(numeric_cols[col], nan_policy='omit')

    print(f"\nColumn: {col}")
    print(f"  Mean: {mean_val:.2f}")
    print(f"  Median: {median_val:.2f}")
    print(f"  Mode: {mode_val}")
    print(f"  Standard Deviation: {std_val:.2f}")
    print(f"  IQR: {iqr_val:.2f}")


## Visualizations (Histogram, KDE, Boxplot, Heatmap)

In [None]:
# TODO: Plot histogram, KDE, boxplot, and correlation heatmap

import matplotlib.pyplot as plt
import seaborn as sns

col = 'Número de víctimas'

plt.figure(figsize=(6, 4))
sns.histplot(df[col], bins=15, kde=False)
plt.title('Histogram of Número de víctimas')
plt.show()

plt.figure(figsize=(6, 4))
sns.kdeplot(df[col], shade=True)
plt.title('KDE of Número de víctimas')
plt.show()

plt.figure(figsize=(6, 4))
sns.boxplot(x=df[col])
plt.title('Boxplot of Número de víctimas')
plt.show()

plt.figure(figsize=(10, 6))
sns.heatmap(df.select_dtypes(include='number').corr(), annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()


## Bootstrap Confidence Interval

In [None]:
# TODO: Bootstrap sample mean and CI (e.g., for 'Número de víctimes')

import numpy as np

boot_means = []
for _ in range(1000):
    sample = df[col].dropna().sample(frac=1, replace=True)
    boot_means.append(sample.mean())

ci_lower = np.percentile(boot_means, 2.5)
ci_upper = np.percentile(boot_means, 97.5)

print(f"Bootstrap 95% CI for mean of '{col}': ({ci_lower:.2f}, {ci_upper:.2f})")

plt.figure(figsize=(6, 4))
sns.histplot(boot_means, bins=30)
plt.axvline(ci_lower, color='red', linestyle='--', label='2.5%')
plt.axvline(ci_upper, color='green', linestyle='--', label='97.5%')
plt.title('Bootstrap Distribution of the Mean')
plt.legend()
plt.show()


## Hypothesis Testing

In [None]:
# TODO: Perform a t-test (e.g., is mean != 1?) and interpret the result

from scipy.stats import ttest_1samp

victims = df['Número de víctimas'].dropna()

t_stat, p_val = ttest_1samp(victims, popmean=1.7)

print(f"T-statistic: {t_stat:.3f}")
print(f"P-value: {p_val:.4f}")

if p_val < 0.05:
    print("Reject the null hypothesis: the mean is significantly different from 1.7.")
else:
    print("Fail to reject the null hypothesis: no significant difference from 1.7.")
