#Objective
##To compare whether the average maximum temperature in Mexico City has changed significantly between 2024 and 2025.

#Hypothesis
##H₀ (null): There is no significant difference in maximum temperature between 2024 and 2025.
##H₁ (alternative): There is a significant difference.

In [18]:
# Import any relevant packages or libraries
import pandas as pd
from scipy import stats

In [19]:
# Load dataset into dataframe
df = pd.read_csv('temperature_data_clean.csv')

In [20]:
# Filter by Mexico City
cdmx = df[df['state'] == 'Ciudad de México']

In [23]:
# Separate by year
temp_2024 = cdmx[cdmx['year'] == 2024]['temperature_max']
temp_2025 = cdmx[cdmx['year'] == 2025]['temperature_max']

In [24]:
# Two-sample independent t-test
t_stat, p_value = stats.ttest_ind(temp_2024, temp_2025, equal_var=False)

print(f"t-statistic: {t_stat:.4f}")
print(f"p-value: {p_value:.4f}")

t-statistic: 1.1373
p-value: 0.2558


In [25]:
# Interpretation
alpha = 0.05
if p_value < alpha:
    print("❌ We reject H₀: there is a significant difference in the maximum temperature between 2024 and 2025.")
else:
    print("✅ H₀ is not rejected: there is not enough evidence to conclude that the temperatures differ.")

✅ H₀ is not rejected: there is not enough evidence to conclude that the temperatures differ.


#2.Compare temperatures between regions

In [28]:
# Compare maximum temperature between two regions
region_1 = df[df['region'] == 'Norte']['temperature_max']
region_2 = df[df['region'] == 'Noroeste']['temperature_max']

In [29]:
# Two-sample independent t-test
t_stat, p_value = stats.ttest_ind(region_1, region_2, equal_var=False)

print(f"t-statistic: {t_stat:.4f}")
print(f"p-value: {p_value:.4f}")

alpha = 0.05
if p_value < alpha:
    print("❌ We reject H₀: there is a significant difference between the regions.")
else:
    print("✅ H₀ is not rejected: there is not enough evidence to say that they differ.")

t-statistic: -12.8265
p-value: 0.0000
❌ We reject H₀: there is a significant difference between the regions.


#3.Check if the minimum temperature has increased in winter

In [30]:
# Filter winter months (January, February, December)
winter = df[df['month'].isin([12, 1, 2])]

In [31]:
# Compare minimum temperature between 2024 and 2025
winter_temp_2024 = winter[winter['year'] == 2024]['temperature_min']
winter_temp_2025 = winter[winter['year'] == 2025]['temperature_min']

In [32]:
# t-test
t_stat, p_value = stats.ttest_ind(winter_temp_2024, winter_temp_2025, equal_var=False)

print(f"t-statistic: {t_stat:.4f}")
print(f"p-value: {p_value:.4f}")

if p_value < alpha:
    print("❌ We reject H₀: the minimum temperature in winter has changed significantly.")
else:
    print("✅ H₀ is not rejected: there is no evidence of significant change.")

t-statistic: 1.5211
p-value: 0.1283
✅ H₀ is not rejected: there is no evidence of significant change.
