In [None]:
# Librerías
import pandas as pd
import numpy as np
import scipy.stats as st

# Challenge 1: Pokémon Data
# Cargar los datos de Pokémon
df = pd.read_csv("https://raw.githubusercontent.com/data-bootcamp-v4/data/main/pokemon.csv")

# Hipótesis 1: Comparar HP promedio entre Pokémon tipo "Dragon" y "Grass"
dragon_hp = df[df['Type 1'] == 'Dragon']['HP']
grass_hp = df[df['Type 1'] == 'Grass']['HP']

# Prueba de hipótesis: Two-sample T-test
t_stat, p_val = st.ttest_ind(dragon_hp, grass_hp, equal_var=False)
print("Hypothesis 1: Dragon vs Grass HP")
print("t-statistic:", t_stat)
print("p-value:", p_val)
if p_val < 0.05:
    print("Reject the null hypothesis: Dragon-type Pokémon have significantly different HP than Grass-type Pokémon.\n")
else:
    print("Fail to reject the null hypothesis: No significant difference in HP between Dragon and Grass types.\n")

# Hipótesis 2: Comparar stats entre Pokémon legendarios y no legendarios
legendary_stats = df[df['Legendary'] == True][['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']]
non_legendary_stats = df[df['Legendary'] == False][['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']]

# Prueba de hipótesis: Two-sample T-test para cada estadística
print("Hypothesis 2: Legendary vs Non-Legendary Stats Comparison")
for stat in legendary_stats.columns:
    t_stat, p_val = st.ttest_ind(legendary_stats[stat], non_legendary_stats[stat], equal_var=False)
    print(f"{stat} - t-statistic: {t_stat}, p-value: {p_val}")
    if p_val < 0.05:
        print(f"Reject the null hypothesis: Significant difference in {stat} between Legendary and Non-Legendary Pokémon.\n")
    else:
        print(f"Fail to reject the null hypothesis: No significant difference in {stat} between Legendary and Non-Legendary Pokémon.\n")

# Challenge 2: California Housing Data
# Cargar los datos de vivienda en California
df = pd.read_csv("https://raw.githubusercontent.com/data-bootcamp-v4/data/main/california_housing.csv")

# Función para calcular distancia euclidiana
def calculate_distance(df, coord):
    return np.sqrt((df['longitude'] - coord[0])**2 + (df['latitude'] - coord[1])**2)

# Coordenadas para la escuela y el hospital
school_coord = (-118, 37)
hospital_coord = (-122, 34)

# Calcular distancias
df['dist_to_school'] = calculate_distance(df, school_coord)
df['dist_to_hospital'] = calculate_distance(df, hospital_coord)

# Clasificar las viviendas como cercanas o lejanas según la distancia
df['close_to_school_or_hospital'] = ((df['dist_to_school'] < 0.5) | (df['dist_to_hospital'] < 0.5)).astype(int)

# Separar datos en grupos de viviendas cercanas y lejanas
close_houses = df[df['close_to_school_or_hospital'] == 1]['median_house_value']
far_houses = df[df['close_to_school_or_hospital'] == 0]['median_house_value']

# Prueba de hipótesis: Two-sample T-test
t_stat, p_val = st.ttest_ind(close_houses, far_houses, equal_var=False)
print("Hypothesis 3: House Price vs Proximity to School/Hospital")
print("t-statistic:", t_stat)
print("p-value:", p_val)
if p_val < 0.05:
    print("Reject the null hypothesis: Houses close to a school or hospital have significantly different prices.\n")
else:
    print("Fail to reject the null hypothesis: No significant difference in prices based on proximity to a school or hospital.\n")
