# Import happiness table from BigQuery

In [1]:
import pandas as pd
from google.cloud import bigquery
from google.colab import auth
auth.authenticate_user()

# Set your Google Cloud project ID
PROJECT_ID = "uk-health-happiness"

# Initialize a BigQuery client
client = bigquery.Client(project=PROJECT_ID)

# Define the full table ID
TABLE_ID = "uk-health-happiness.cleaned_data.Happiness_more_factors_31"

# Query to select all data from the table
query = f"SELECT * FROM `{TABLE_ID}`"

# Run the query and store results in a Pandas DataFrame
df = client.query(query).to_dataframe()

# Display the first few rows of the dataset
df.head()

Unnamed: 0,Region,avg_min_temp_c1,avg_max_temp_c,total_rain_mm,avg_temp_c,avg_wind_speed_m_s,avg_wind_gust_speed_m_s,avg_dominant_wind_direction,Happiness_Latitude,Happiness_Longitude,...,avg_pop_per_park,median_pop_per_park,nb_flats,nb_flats_with_gardens,avg_garden_size_flats,nb_houses,nb_houses_with_gardens,avg_garden_size_houses,new_low,new_high
0,Norfolk,7.216164,13.756986,644.3,10.529589,25.846849,45.65589,202.2,52.67,1.0,...,,,,,,,,,24.3,75.7
1,North Yorkshire,6.044384,12.832877,1403.9,9.521918,23.295616,44.159589,211.776712,54.13,-1.5,...,,,,,,,,,25.8,74.21
2,Kent,7.642192,13.818904,637.6,10.793151,22.991507,45.469589,199.961644,51.21,0.72,...,,,,,,,,,27.37,72.63
3,Lancashire,6.130959,11.881644,1301.4,9.098356,22.877808,47.453151,208.09863,53.86,-2.57,...,,,,,,,,,26.4,73.6
4,Worcestershire,6.752192,13.706575,1281.0,10.340137,22.044932,44.469315,206.010959,52.21,-2.16,...,,,,,,,,,25.42,74.58


# ANOVA tests for average happiness

In [2]:
import scipy.stats as stats

#Avergae mean rating measure
df = df.dropna(subset=['average_mean_rating'])

#Create 3 equal sized groups for ANOVA
df['Rainfall_Category'] = pd.qcut(df['total_rain_mm'], q=3, labels=['Low', 'Medium', 'High'])
df['Temperature_Category'] = pd.qcut(df['avg_temp_c'], q=3, labels=['Low', 'Medium', 'High'])
df['WindSpeed_Category'] = pd.qcut(df['avg_wind_speed_m_s'], q=3, labels=['Low', 'Medium', 'High'])
df['MinTemp_Category'] = pd.qcut(df['avg_min_temp_c1'], q=3, labels=['Low', 'Medium', 'High'])
df['MaxTemp_Category'] = pd.qcut(df['avg_max_temp_c'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgTemp_Category'] = pd.qcut(df['avg_temp_c'], q=3, labels=['Low', 'Medium', 'High'])
df['WindGustSpeed_Category'] = pd.qcut(df['avg_wind_gust_speed_m_s'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgWindDirection_Category'] = pd.qcut(df['avg_dominant_wind_direction'], q=3, labels=['Low', 'Medium', 'High'])
df['Latitude_Category'] = pd.qcut(df['Happiness_Latitude'], q=3, labels=['Low', 'Medium', 'High'])
df['Longitude_Category'] = pd.qcut(df['Happiness_Longitude'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgDistPark_Category'] = pd.qcut(df['avg_dist_to_park'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgSizePark_Category'] = pd.qcut(df['avg_size_park'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgPopPark_Category'] = pd.qcut(df['avg_pop_per_park'], q=3, labels=['Low', 'Medium', 'High'])
df['AirQuality_Category'] = pd.qcut(df['AirQuality'], q=3, labels=['Low', 'Medium', 'High'])
df['WaterPollution_Category'] = pd.qcut(df['WaterPollution'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgDogs_Category'] = pd.qcut(df['average_dogs_per_household'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgCats_Category'] = pd.qcut(df['average_cats_per_household'], q=3, labels=['Low', 'Medium', 'High'])
df['MedianDistToPark_Category'] = pd.qcut(df['median_dist_to_park'], q=3, labels=['Low', 'Medium', 'High'])
df['MedianSizePark_Category'] = pd.qcut(df['median_size_park'], q=3, labels=['Low', 'Medium', 'High'])
df['NbFlats_Category'] = pd.qcut(df['nb_flats'], q=3, labels=['Low', 'Medium', 'High'])
df['NbFlatsWithGardens_Category'] = pd.qcut(df['nb_flats_with_gardens'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgGardenSizeFlats_Category'] = pd.qcut(df['avg_garden_size_flats'], q=3, labels=['Low', 'Medium', 'High'])
df['NbHouses_Category'] = pd.qcut(df['nb_houses'], q=3, labels=['Low', 'Medium', 'High'])
df['NbHousesWithGardens_Category'] = pd.qcut(df['nb_houses_with_gardens'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgGardenSizeHouses_Category'] = pd.qcut(df['avg_garden_size_houses'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgNbParksWithin1000m_Category'] = pd.qcut(df['avg_nb_parks_within_1000m'], q=3, labels=['Low', 'Medium', 'High'])


# ANOVA Test for Rainfall Effect on Happiness
rainfall_groups = [df[df['Rainfall_Category'] == cat]['average_mean_rating'] for cat in ['Low', 'Medium', 'High']]
rainfall_anova = stats.f_oneway(*rainfall_groups)

# ANOVA Test for Temperature Effect on Happiness
temperature_groups = [df[df['Temperature_Category'] == cat]['average_mean_rating'] for cat in ['Low', 'Medium', 'High']]
temperature_anova = stats.f_oneway(*temperature_groups)

# ANOVA Test for Wind Speed Effect on Happiness
windspeed_groups = [df[df['WindSpeed_Category'] == cat]['average_mean_rating'] for cat in ['Low', 'Medium', 'High']]
windspeed_anova = stats.f_oneway(*windspeed_groups)

# ANOVA Test for Min Temperature Effect on Happiness
mintemp_groups = [df[df['MinTemp_Category'] == cat]['average_mean_rating'] for cat in ['Low', 'Medium', 'High']]
mintemp_anova = stats.f_oneway(*mintemp_groups)

# ANOVA Test for Max Temperature Effect on Happiness
maxtemp_groups = [df[df['MaxTemp_Category'] == cat]['average_mean_rating'] for cat in ['Low', 'Medium', 'High']]
maxtemp_anova = stats.f_oneway(*maxtemp_groups)

# ANOVA Test for Avg Temperature Effect on Happiness
avgtemp_groups = [df[df['AvgTemp_Category'] == cat]['average_mean_rating'] for cat in ['Low', 'Medium', 'High']]
avgtemp_anova = stats.f_oneway(*avgtemp_groups)

# ANOVA Test for Avg Wind Gust Speed Effect on Happiness
windgustspeed_groups = [df[df['WindGustSpeed_Category'] == cat]['average_mean_rating'] for cat in ['Low', 'Medium', 'High']]
windgustspeed_anova = stats.f_oneway(*windgustspeed_groups)

# ANOVA Test for Avg Wind Direction on Happiness
winddirection_groups = [df[df['AvgWindDirection_Category'] == cat]['average_mean_rating'] for cat in ['Low', 'Medium', 'High']]
winddirection_anova = stats.f_oneway(*winddirection_groups)

# ANOVA Test for Latitude on Happiness
latitude_groups = [df[df['Latitude_Category'] == cat]['average_mean_rating'] for cat in ['Low', 'Medium', 'High']]
latitude_anova = stats.f_oneway(*latitude_groups)

# ANOVA Test for Longitude on Happiness
longitude_groups = [df[df['Longitude_Category'] == cat]['average_mean_rating'] for cat in ['Low', 'Medium', 'High']]
longitude_anova = stats.f_oneway(*longitude_groups)

# ANOVA Test for Avg Distance to Park on Happiness
avgdistpark_groups = [df[df['AvgDistPark_Category'] == cat]['average_mean_rating'] for cat in ['Low', 'Medium', 'High']]
avgdistpark_anova = stats.f_oneway(*avgdistpark_groups)

# ANOVA Test for Avg Park Size on Happiness
avgparksize_groups = [df[df['AvgSizePark_Category'] == cat]['average_mean_rating'] for cat in ['Low', 'Medium', 'High']]
avgparksize_anova = stats.f_oneway(*avgparksize_groups)

# ANOVA Test for Avg Population per Park on Happiness
avgpoppark_groups = [df[df['AvgPopPark_Category'] == cat]['average_mean_rating'] for cat in ['Low', 'Medium', 'High']]
avgpoppark_anova = stats.f_oneway(*avgpoppark_groups)

# ANOVA Test for median distance to park on Happiness
median_size_park_groups = [df[df['MedianSizePark_Category'] == cat]['average_mean_rating'] for cat in ['Low', 'Medium', 'High']]
median_size_park_anova = stats.f_oneway(*median_size_park_groups)

# ANOVA Test for Air Quality on Happiness
airquality_groups = [df[df['AirQuality_Category'] == cat]['average_mean_rating'] for cat in ['Low', 'Medium', 'High']]
airquality_anova = stats.f_oneway(*airquality_groups)

# ANOVA Test for Water Pollution on Happiness
waterpollution_groups = [df[df['WaterPollution_Category'] == cat]['average_mean_rating'] for cat in ['Low', 'Medium', 'High']]
waterpollution_anova = stats.f_oneway(*waterpollution_groups)

# ANOVA Test for Avg Dog Population per Household on Happiness
avgdogs_groups = [df[df['AvgDogs_Category'] == cat]['average_mean_rating'] for cat in ['Low', 'Medium', 'High']]
avgdogs_anova = stats.f_oneway(*avgdogs_groups)

# ANOVA Test for Avg Cat Population per Household on Happiness
avgcats_groups = [df[df['AvgCats_Category'] == cat]['average_mean_rating'] for cat in ['Low', 'Medium', 'High']]
avgcats_anova = stats.f_oneway(*avgcats_groups)

# ANOVA Test for median distance to park on Happiness
median_distance_park_groups = [df[df['MedianDistToPark_Category'] == cat]['average_mean_rating'] for cat in ['Low', 'Medium', 'High']]
median_distance_park_anova = stats.f_oneway(*median_distance_park_groups)


# ANOVA Test for number of flats on Happiness
NbFlats_groups = [df[df['NbFlats_Category'] == cat]['average_mean_rating'] for cat in ['Low', 'Medium', 'High']]
NbFlats_anova = stats.f_oneway(*NbFlats_groups)

# ANOVA Test for number of flats with gardens on Happiness
NbFlatsWithGardens_groups = [df[df['NbFlatsWithGardens_Category'] == cat]['average_mean_rating'] for cat in ['Low', 'Medium', 'High']]
NbFlatsWithGardens_anova = stats.f_oneway(*NbFlatsWithGardens_groups)

# ANOVA Test for average garden size flats on Happiness
AvgGardenSizeFlats_groups = [df[df['AvgGardenSizeFlats_Category'] == cat]['average_mean_rating'] for cat in ['Low', 'Medium', 'High']]
AvgGardenSizeFlats_anova = stats.f_oneway(*AvgGardenSizeFlats_groups)

# ANOVA Test for number of houses on Happiness
NbHouses_groups = [df[df['NbHouses_Category'] == cat]['average_mean_rating'] for cat in ['Low', 'Medium', 'High']]
NbHouses_anova = stats.f_oneway(*NbHouses_groups)

# ANOVA Test for houses with gardens on Happiness
NbHousesWithGardens_groups = [df[df['NbHousesWithGardens_Category'] == cat]['average_mean_rating'] for cat in ['Low', 'Medium', 'High']]
NbHousesWithGardens_anova = stats.f_oneway(*NbHousesWithGardens_groups)

# ANOVA Test for average garden size houses on Happiness
AvgGardenSizeHouses_groups = [df[df['AvgGardenSizeHouses_Category'] == cat]['average_mean_rating'] for cat in ['Low', 'Medium', 'High']]
AvgGardenSizeHouses_anova = stats.f_oneway(*AvgGardenSizeHouses_groups)

# ANOVA Test for country on Happiness
country_groups = [df[df['country_name'] == country]['average_mean_rating'] for country in df['country_name'].unique()]
country_anova = stats.f_oneway(*country_groups)

# ANOVA Test for region on Happiness
region_groups = [df[df['region_name'] == region]['average_mean_rating'] for region in df['region_name'].unique()]
region_anova = stats.f_oneway(*region_groups)

# ANOVA Test for region on Happiness
county_groups = [df[df['county_name'] == county]['average_mean_rating'] for county in df['county_name'].unique()]
county_anova = stats.f_oneway(*county_groups)

# ANOVA Test for average number of parks within 1000m to park on Happiness
AvgNbParksWithin1000m_groups = [df[df['AvgNbParksWithin1000m_Category'] == cat]['average_mean_rating'] for cat in ['Low', 'Medium', 'High']]
AvgNbParksWithin1000m_anova = stats.f_oneway(*AvgNbParksWithin1000m_groups)



print(f"ANOVA Results for Rainfall:\nF-Statistic: {rainfall_anova.statistic:.4f}, p-value: {rainfall_anova.pvalue:.4f}")
print(f"ANOVA Results for Temperature:\nF-Statistic: {temperature_anova.statistic:.4f}, p-value: {temperature_anova.pvalue:.4f}")
print(f"ANOVA Results for Wind Speed:\nF-Statistic: {windspeed_anova.statistic:.4f}, p-value: {windspeed_anova.pvalue:.4f}")
print(f"ANOVA Results for Min Temperature:\nF-Statistic: {mintemp_anova.statistic:.4f}, p-value: {mintemp_anova.pvalue:.4f}")
print(f"ANOVA Results for Max Temperature:\nF-Statistic: {maxtemp_anova.statistic:.4f}, p-value: {maxtemp_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Temperature:\nF-Statistic: {avgtemp_anova.statistic:.4f}, p-value: {avgtemp_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Wind Gust Speed:\nF-Statistic: {windgustspeed_anova.statistic:.4f}, p-value: {windgustspeed_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Wind Direction:\nF-Statistic: {winddirection_anova.statistic:.4f}, p-value: {winddirection_anova.pvalue:.4f}")
print(f"ANOVA Results for Latitude:\nF-Statistic: {latitude_anova.statistic:.4f}, p-value: {latitude_anova.pvalue:.4f}")
print(f"ANOVA Results for Longitude:\nF-Statistic: {longitude_anova.statistic:.4f}, p-value: {longitude_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Distance to Park:\nF-Statistic: {avgdistpark_anova.statistic:.4f}, p-value: {avgdistpark_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Park Size:\nF-Statistic: {avgparksize_anova.statistic:.4f}, p-value: {avgparksize_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Population per Park:\nF-Statistic: {avgpoppark_anova.statistic:.4f}, p-value: {avgpoppark_anova.pvalue:.4f}")
print(f"ANOVA Results for Air Quality:\nF-Statistic: {airquality_anova.statistic:.4f}, p-value: {airquality_anova.pvalue:.4f}")
print(f"ANOVA Results for Water Pollution:\nF-Statistic: {waterpollution_anova.statistic:.4f}, p-value: {waterpollution_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Dog Population per Household:\nF-Statistic: {avgdogs_anova.statistic:.4f}, p-value: {avgdogs_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Cat Population per Household:\nF-Statistic: {avgcats_anova.statistic:.4f}, p-value: {avgcats_anova.pvalue:.4f}")
print(f"ANOVA Results for median distance to park:\nF-Statistic: {median_distance_park_anova.statistic:.4f}, p-value: {median_distance_park_anova.pvalue:.4f}")
print(f"ANOVA Results for median size of park:\nF-Statistic: {median_size_park_anova.statistic:.4f}, p-value: {median_size_park_anova.pvalue:.4f}")
print(f"ANOVA Results for average number of parks within 1000m to park:\nF-Statistic: {AvgNbParksWithin1000m_anova.statistic:.4f}, p-value: {AvgNbParksWithin1000m_anova.pvalue:.4f}")
print(f"ANOVA Results for Number of Flats:\nF-Statistic: {NbFlats_anova.statistic:.4f}, p-value: {NbFlats_anova.pvalue:.4f}")
print(f"ANOVA Results for Number of Flats with Gardens:\nF-Statistic: {NbFlatsWithGardens_anova.statistic:.4f}, p-value: {NbFlatsWithGardens_anova.pvalue:.4f}")
print(f"ANOVA Results for Average Garden Size (Flats):\nF-Statistic: {AvgGardenSizeFlats_anova.statistic:.4f}, p-value: {AvgGardenSizeFlats_anova.pvalue:.4f}")
print(f"ANOVA Results for Number of Houses:\nF-Statistic: {NbHouses_anova.statistic:.4f}, p-value: {NbHouses_anova.pvalue:.4f}")
print(f"ANOVA Results for Number of Houses with Gardens:\nF-Statistic: {NbHousesWithGardens_anova.statistic:.4f}, p-value: {NbHousesWithGardens_anova.pvalue:.4f}")
print(f"ANOVA Results for Average Garden Size (Houses):\nF-Statistic: {AvgGardenSizeHouses_anova.statistic:.4f}, p-value: {AvgGardenSizeHouses_anova.pvalue:.4f}")
print(f"ANOVA Results for Country:\nF-Statistic: {country_anova.statistic:.4f}, p-value: {country_anova.pvalue:.4f}")
print(f"ANOVA Results for Region:\nF-Statistic: {region_anova.statistic:.4f}, p-value: {region_anova.pvalue:.4f}")
print(f"ANOVA Results for County:\nF-Statistic: {county_anova.statistic:.4f}, p-value: {county_anova.pvalue:.4f}\n")



# Interpretation for Rainfall
if rainfall_anova.pvalue < 0.05:
    print("✅ Rainfall has a significant effect on happiness (reject H₀).")
else:
    print("❌ Rainfall does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Temperature
if temperature_anova.pvalue < 0.05:
    print("✅ Temperature has a significant effect on happiness (reject H₀).")
else:
    print("❌ Temperature does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Wind Speed
if windspeed_anova.pvalue < 0.05:
    print("✅ Wind speed has a significant effect on happiness (reject H₀).")
else:
    print("❌ Wind speed does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Min Temperature
if mintemp_anova.pvalue < 0.05:
    print("✅ Min Temperature has a significant effect on happiness (reject H₀).")
else:
    print("❌ Min Temperature does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Max Temperature
if maxtemp_anova.pvalue < 0.05:
    print("✅ Max Temperature has a significant effect on happiness (reject H₀).")
else:
    print("❌ Max Temperature does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Temperature
if avgtemp_anova.pvalue < 0.05:
    print("✅ Avg Temperature has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Temperature does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Wind Gust Speed
if windgustspeed_anova.pvalue < 0.05:
    print("✅ Avg Wind Gust Speed has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Wind Gust Speed does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Wind Direction
if winddirection_anova.pvalue < 0.05:
    print("✅ Avg Wind Direction has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Wind Direction does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Latitude
if latitude_anova.pvalue < 0.05:
    print("✅ Latitude has a significant effect on happiness (reject H₀).")
else:
    print("❌ Latitude does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Longitude
if longitude_anova.pvalue < 0.05:
    print("✅ Longitude has a significant effect on happiness (reject H₀).")
else:
    print("❌ Longitude does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Distance to Park
if avgdistpark_anova.pvalue < 0.05:
    print("✅ Avg Distance to Park has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Distance to Park does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Park Size
if avgparksize_anova.pvalue < 0.05:
    print("✅ Avg Park Size has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Park Size does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Population per Park
if avgpoppark_anova.pvalue < 0.05:
    print("✅ Avg Population per Park has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Population per Park does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Air Quality
if airquality_anova.pvalue < 0.05:
    print("✅ Air Quality has a significant effect on happiness (reject H₀).")
else:
    print("❌ Air Quality does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Water Pollution
if waterpollution_anova.pvalue < 0.05:
    print("✅ Water Pollution has a significant effect on happiness (reject H₀).")
else:
    print("❌ Water Pollution does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Dog Population per Household
if avgdogs_anova.pvalue < 0.05:
    print("✅ Avg Dog Population per Household has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Dog Population per Household does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Cat Population per Household
if avgcats_anova.pvalue < 0.05:
    print("✅ Avg Cat Population per Household has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Cat Population per Household does not significantly affect happiness (fail to reject H₀).")

# Median Distance to Park
if median_distance_park_anova.pvalue < 0.05:
    print("✅ Median distance to park has a significant effect on happiness (reject H₀)")
else:
    print("❌ Median distance to park does not significantly affect happiness (fail to reject H₀)")

# Median Size of Park
if median_size_park_anova.pvalue < 0.05:
    print("✅ Median size of park has a significant effect on happiness (reject H₀)")
else:
    print("❌ Median size of park does not significantly affect happiness (fail to reject H₀)")

# Average Number of Parks Within 1000m
if AvgNbParksWithin1000m_anova.pvalue < 0.05:
    print("✅ Average number of parks within 1000m has a significant effect on happiness (reject H₀)")
else:
    print("❌ Average number of parks within 1000m does not significantly affect happiness (fail to reject H₀)")

# Number of Flats
if NbFlats_anova.pvalue < 0.05:
    print("✅ Number of flats has a significant effect on happiness (reject H₀)")
else:
    print("❌ Number of flats does not significantly affect happiness (fail to reject H₀)")

# Number of Flats with Gardens
if NbFlatsWithGardens_anova.pvalue < 0.05:
    print("✅ Number of flats with gardens has a significant effect on happiness (reject H₀)")
else:
    print("❌ Number of flats with gardens does not significantly affect happiness (fail to reject H₀)")

# Average Garden Size (Flats)
if AvgGardenSizeFlats_anova.pvalue < 0.05:
    print("✅ Average garden size for flats has a significant effect on happiness (reject H₀)")
else:
    print("❌ Average garden size for flats does not significantly affect happiness (fail to reject H₀)")

# Number of Houses
if NbHouses_anova.pvalue < 0.05:
    print("✅ Number of houses has a significant effect on happiness (reject H₀)")
else:
    print("❌ Number of houses does not significantly affect happiness (fail to reject H₀)")

# Number of Houses with Gardens
if NbHousesWithGardens_anova.pvalue < 0.05:
    print("✅ Number of houses with gardens has a significant effect on happiness (reject H₀)")
else:
    print("❌ Number of houses with gardens does not significantly affect happiness (fail to reject H₀)")

# Average Garden Size (Houses)
if AvgGardenSizeHouses_anova.pvalue < 0.05:
    print("✅ Average garden size for houses has a significant effect on happiness (reject H₀)")
else:
    print("❌ Average garden size for houses does not significantly affect happiness (fail to reject H₀)")

# ANOVA Results for Country
if country_anova.pvalue < 0.05:
    print("✅ Country has a significant effect on happiness (reject H₀)")
else:
    print("❌ Country does not significantly affect happiness (fail to reject H₀)")

# ANOVA Results for Region
if region_anova.pvalue < 0.05:
    print("✅ Region has a significant effect on happiness (reject H₀)")
else:
    print("❌ Region does not significantly affect happiness (fail to reject H₀)")

# ANOVA Results for County
if county_anova.pvalue < 0.05:
    print("✅ County has a significant effect on happiness (reject H₀)")
else:
    print("❌ County does not significantly affect happiness (fail to reject H₀)")



  if _f_oneway_is_too_small(samples):


ANOVA Results for Rainfall:
F-Statistic: 2.4870, p-value: 0.0844
ANOVA Results for Temperature:
F-Statistic: 0.5902, p-value: 0.5547
ANOVA Results for Wind Speed:
F-Statistic: 0.9569, p-value: 0.3849
ANOVA Results for Min Temperature:
F-Statistic: 1.1806, p-value: 0.3081
ANOVA Results for Max Temperature:
F-Statistic: 2.2407, p-value: 0.1077
ANOVA Results for Avg Temperature:
F-Statistic: 0.5902, p-value: 0.5547
ANOVA Results for Avg Wind Gust Speed:
F-Statistic: 1.4760, p-value: 0.2298
ANOVA Results for Avg Wind Direction:
F-Statistic: 1.5343, p-value: 0.2168
ANOVA Results for Latitude:
F-Statistic: 0.2805, p-value: 0.7556
ANOVA Results for Longitude:
F-Statistic: 0.9626, p-value: 0.3828
ANOVA Results for Avg Distance to Park:
F-Statistic: 40.7499, p-value: 0.0000
ANOVA Results for Avg Park Size:
F-Statistic: 7.1882, p-value: 0.0009
ANOVA Results for Avg Population per Park:
F-Statistic: 12.2796, p-value: 0.0000
ANOVA Results for Air Quality:
F-Statistic: 1.1736, p-value: 0.3135
ANOVA

# ANOVA tests for low happiness

In [3]:
#Measure for ANOVA
df = df.dropna(subset=['low'])

#Create 3 equal sized groups for ANOVA
df['Rainfall_Category'] = pd.qcut(df['total_rain_mm'], q=3, labels=['Low', 'Medium', 'High'])
df['Temperature_Category'] = pd.qcut(df['avg_temp_c'], q=3, labels=['Low', 'Medium', 'High'])
df['WindSpeed_Category'] = pd.qcut(df['avg_wind_speed_m_s'], q=3, labels=['Low', 'Medium', 'High'])
df['MinTemp_Category'] = pd.qcut(df['avg_min_temp_c1'], q=3, labels=['Low', 'Medium', 'High'])
df['MaxTemp_Category'] = pd.qcut(df['avg_max_temp_c'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgTemp_Category'] = pd.qcut(df['avg_temp_c'], q=3, labels=['Low', 'Medium', 'High'])
df['WindGustSpeed_Category'] = pd.qcut(df['avg_wind_gust_speed_m_s'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgWindDirection_Category'] = pd.qcut(df['avg_dominant_wind_direction'], q=3, labels=['Low', 'Medium', 'High'])
df['Latitude_Category'] = pd.qcut(df['Happiness_Latitude'], q=3, labels=['Low', 'Medium', 'High'])
df['Longitude_Category'] = pd.qcut(df['Happiness_Longitude'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgDistPark_Category'] = pd.qcut(df['avg_dist_to_park'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgSizePark_Category'] = pd.qcut(df['avg_size_park'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgPopPark_Category'] = pd.qcut(df['avg_pop_per_park'], q=3, labels=['Low', 'Medium', 'High'])
df['AirQuality_Category'] = pd.qcut(df['AirQuality'], q=3, labels=['Low', 'Medium', 'High'])
df['WaterPollution_Category'] = pd.qcut(df['WaterPollution'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgDogs_Category'] = pd.qcut(df['average_dogs_per_household'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgCats_Category'] = pd.qcut(df['average_cats_per_household'], q=3, labels=['Low', 'Medium', 'High'])
df['MedianDistToPark_Category'] = pd.qcut(df['median_dist_to_park'], q=3, labels=['Low', 'Medium', 'High'])
df['NbFlats_Category'] = pd.qcut(df['nb_flats'], q=3, labels=['Low', 'Medium', 'High'])
df['NbFlatsWithGardens_Category'] = pd.qcut(df['nb_flats_with_gardens'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgGardenSizeFlats_Category'] = pd.qcut(df['avg_garden_size_flats'], q=3, labels=['Low', 'Medium', 'High'])
df['NbHouses_Category'] = pd.qcut(df['nb_houses'], q=3, labels=['Low', 'Medium', 'High'])
df['NbHousesWithGardens_Category'] = pd.qcut(df['nb_houses_with_gardens'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgGardenSizeHouses_Category'] = pd.qcut(df['avg_garden_size_houses'], q=3, labels=['Low', 'Medium', 'High'])


# ANOVA Test for Rainfall Effect on Happiness
rainfall_groups = [df[df['Rainfall_Category'] == cat]['low'] for cat in ['Low', 'Medium', 'High']]
rainfall_anova = stats.f_oneway(*rainfall_groups)

# ANOVA Test for Temperature Effect on Happiness
temperature_groups = [df[df['Temperature_Category'] == cat]['low'] for cat in ['Low', 'Medium', 'High']]
temperature_anova = stats.f_oneway(*temperature_groups)

# ANOVA Test for Wind Speed Effect on Happiness
windspeed_groups = [df[df['WindSpeed_Category'] == cat]['low'] for cat in ['Low', 'Medium', 'High']]
windspeed_anova = stats.f_oneway(*windspeed_groups)

# ANOVA Test for Min Temperature Effect on Happiness
mintemp_groups = [df[df['MinTemp_Category'] == cat]['low'] for cat in ['Low', 'Medium', 'High']]
mintemp_anova = stats.f_oneway(*mintemp_groups)

# ANOVA Test for Max Temperature Effect on Happiness
maxtemp_groups = [df[df['MaxTemp_Category'] == cat]['low'] for cat in ['Low', 'Medium', 'High']]
maxtemp_anova = stats.f_oneway(*maxtemp_groups)

# ANOVA Test for Avg Temperature Effect on Happiness
avgtemp_groups = [df[df['AvgTemp_Category'] == cat]['low'] for cat in ['Low', 'Medium', 'High']]
avgtemp_anova = stats.f_oneway(*avgtemp_groups)

# ANOVA Test for Avg Wind Gust Speed Effect on Happiness
windgustspeed_groups = [df[df['WindGustSpeed_Category'] == cat]['low'] for cat in ['Low', 'Medium', 'High']]
windgustspeed_anova = stats.f_oneway(*windgustspeed_groups)

# ANOVA Test for Avg Wind Direction on Happiness
winddirection_groups = [df[df['AvgWindDirection_Category'] == cat]['low'] for cat in ['Low', 'Medium', 'High']]
winddirection_anova = stats.f_oneway(*winddirection_groups)

# ANOVA Test for Latitude on Happiness
latitude_groups = [df[df['Latitude_Category'] == cat]['low'] for cat in ['Low', 'Medium', 'High']]
latitude_anova = stats.f_oneway(*latitude_groups)

# ANOVA Test for Longitude on Happiness
longitude_groups = [df[df['Longitude_Category'] == cat]['low'] for cat in ['Low', 'Medium', 'High']]
longitude_anova = stats.f_oneway(*longitude_groups)

# ANOVA Test for Avg Distance to Park on Happiness
avgdistpark_groups = [df[df['AvgDistPark_Category'] == cat]['low'] for cat in ['Low', 'Medium', 'High']]
avgdistpark_anova = stats.f_oneway(*avgdistpark_groups)

# ANOVA Test for Avg Park Size on Happiness
avgparksize_groups = [df[df['AvgSizePark_Category'] == cat]['low'] for cat in ['Low', 'Medium', 'High']]
avgparksize_anova = stats.f_oneway(*avgparksize_groups)

# ANOVA Test for Avg Population per Park on Happiness
avgpoppark_groups = [df[df['AvgPopPark_Category'] == cat]['low'] for cat in ['Low', 'Medium', 'High']]
avgpoppark_anova = stats.f_oneway(*avgpoppark_groups)

# ANOVA Test for Air Quality on Happiness
airquality_groups = [df[df['AirQuality_Category'] == cat]['low'] for cat in ['Low', 'Medium', 'High']]
airquality_anova = stats.f_oneway(*airquality_groups)

# ANOVA Test for Water Pollution on Happiness
waterpollution_groups = [df[df['WaterPollution_Category'] == cat]['low'] for cat in ['Low', 'Medium', 'High']]
waterpollution_anova = stats.f_oneway(*waterpollution_groups)

# ANOVA Test for Avg Dog Population per Household on Happiness
avgdogs_groups = [df[df['AvgDogs_Category'] == cat]['low'] for cat in ['Low', 'Medium', 'High']]
avgdogs_anova = stats.f_oneway(*avgdogs_groups)

# ANOVA Test for Avg Cat Population per Household on Happiness
avgcats_groups = [df[df['AvgCats_Category'] == cat]['low'] for cat in ['Low', 'Medium', 'High']]
avgcats_anova = stats.f_oneway(*avgcats_groups)

# ANOVA Test for median distance to park on Happiness
median_distance_park_groups = [df[df['MedianDistToPark_Category'] == cat]['low'] for cat in ['Low', 'Medium', 'High']]
median_distance_park_anova = stats.f_oneway(*median_distance_park_groups)

# ANOVA Test for number of flats on Happiness
NbFlats_groups = [df[df['NbFlats_Category'] == cat]['low'] for cat in ['Low', 'Medium', 'High']]
NbFlats_anova = stats.f_oneway(*NbFlats_groups)

# ANOVA Test for number of flats with gardens on Happiness
NbFlatsWithGardens_groups = [df[df['NbFlatsWithGardens_Category'] == cat]['low'] for cat in ['Low', 'Medium', 'High']]
NbFlatsWithGardens_anova = stats.f_oneway(*NbFlatsWithGardens_groups)

# ANOVA Test for average garden size flats on Happiness
AvgGardenSizeFlats_groups = [df[df['AvgGardenSizeFlats_Category'] == cat]['low'] for cat in ['Low', 'Medium', 'High']]
AvgGardenSizeFlats_anova = stats.f_oneway(*AvgGardenSizeFlats_groups)

# ANOVA Test for number of houses on Happiness
NbHouses_groups = [df[df['NbHouses_Category'] == cat]['low'] for cat in ['Low', 'Medium', 'High']]
NbHouses_anova = stats.f_oneway(*NbHouses_groups)

# ANOVA Test for houses with gardens on Happiness
NbHousesWithGardens_groups = [df[df['NbHousesWithGardens_Category'] == cat]['low'] for cat in ['Low', 'Medium', 'High']]
NbHousesWithGardens_anova = stats.f_oneway(*NbHousesWithGardens_groups)

# ANOVA Test for average garden size houses on Happiness
AvgGardenSizeHouses_groups = [df[df['AvgGardenSizeHouses_Category'] == cat]['low'] for cat in ['Low', 'Medium', 'High']]
AvgGardenSizeHouses_anova = stats.f_oneway(*AvgGardenSizeHouses_groups)

# ANOVA Test for country on Happiness
country_groups = [df[df['country_name'] == country]['low'] for country in df['country_name'].unique()]
country_anova = stats.f_oneway(*country_groups)

# ANOVA Test for region on Happiness
region_groups = [df[df['region_name'] == region]['low'] for region in df['region_name'].unique()]
region_anova = stats.f_oneway(*region_groups)

# ANOVA Test for region on Happiness
county_groups = [df[df['county_name'] == county]['low'] for county in df['county_name'].unique()]
county_anova = stats.f_oneway(*county_groups)



print(f"ANOVA Results for Rainfall:\nF-Statistic: {rainfall_anova.statistic:.4f}, p-value: {rainfall_anova.pvalue:.4f}")
print(f"ANOVA Results for Temperature:\nF-Statistic: {temperature_anova.statistic:.4f}, p-value: {temperature_anova.pvalue:.4f}")
print(f"ANOVA Results for Wind Speed:\nF-Statistic: {windspeed_anova.statistic:.4f}, p-value: {windspeed_anova.pvalue:.4f}")
print(f"ANOVA Results for Min Temperature:\nF-Statistic: {mintemp_anova.statistic:.4f}, p-value: {mintemp_anova.pvalue:.4f}")
print(f"ANOVA Results for Max Temperature:\nF-Statistic: {maxtemp_anova.statistic:.4f}, p-value: {maxtemp_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Temperature:\nF-Statistic: {avgtemp_anova.statistic:.4f}, p-value: {avgtemp_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Wind Gust Speed:\nF-Statistic: {windgustspeed_anova.statistic:.4f}, p-value: {windgustspeed_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Wind Direction:\nF-Statistic: {winddirection_anova.statistic:.4f}, p-value: {winddirection_anova.pvalue:.4f}")
print(f"ANOVA Results for Latitude:\nF-Statistic: {latitude_anova.statistic:.4f}, p-value: {latitude_anova.pvalue:.4f}")
print(f"ANOVA Results for Longitude:\nF-Statistic: {longitude_anova.statistic:.4f}, p-value: {longitude_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Distance to Park:\nF-Statistic: {avgdistpark_anova.statistic:.4f}, p-value: {avgdistpark_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Park Size:\nF-Statistic: {avgparksize_anova.statistic:.4f}, p-value: {avgparksize_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Population per Park:\nF-Statistic: {avgpoppark_anova.statistic:.4f}, p-value: {avgpoppark_anova.pvalue:.4f}")
print(f"ANOVA Results for Air Quality:\nF-Statistic: {airquality_anova.statistic:.4f}, p-value: {airquality_anova.pvalue:.4f}")
print(f"ANOVA Results for Water Pollution:\nF-Statistic: {waterpollution_anova.statistic:.4f}, p-value: {waterpollution_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Dog Population per Household:\nF-Statistic: {avgdogs_anova.statistic:.4f}, p-value: {avgdogs_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Cat Population per Household:\nF-Statistic: {avgcats_anova.statistic:.4f}, p-value: {avgcats_anova.pvalue:.4f}")
print(f"ANOVA Results for median distance to park:\nF-Statistic: {median_distance_park_anova.statistic:.4f}, p-value: {median_distance_park_anova.pvalue:.4f}")
print(f"ANOVA Results for median size of park:\nF-Statistic: {median_size_park_anova.statistic:.4f}, p-value: {median_size_park_anova.pvalue:.4f}")
print(f"ANOVA Results for average number of parks within 1000m to park:\nF-Statistic: {AvgNbParksWithin1000m_anova.statistic:.4f}, p-value: {AvgNbParksWithin1000m_anova.pvalue:.4f}")
print(f"ANOVA Results for Number of Flats:\nF-Statistic: {NbFlats_anova.statistic:.4f}, p-value: {NbFlats_anova.pvalue:.4f}")
print(f"ANOVA Results for Number of Flats with Gardens:\nF-Statistic: {NbFlatsWithGardens_anova.statistic:.4f}, p-value: {NbFlatsWithGardens_anova.pvalue:.4f}")
print(f"ANOVA Results for Average Garden Size (Flats):\nF-Statistic: {AvgGardenSizeFlats_anova.statistic:.4f}, p-value: {AvgGardenSizeFlats_anova.pvalue:.4f}")
print(f"ANOVA Results for Number of Houses:\nF-Statistic: {NbHouses_anova.statistic:.4f}, p-value: {NbHouses_anova.pvalue:.4f}")
print(f"ANOVA Results for Number of Houses with Gardens:\nF-Statistic: {NbHousesWithGardens_anova.statistic:.4f}, p-value: {NbHousesWithGardens_anova.pvalue:.4f}")
print(f"ANOVA Results for Average Garden Size (Houses):\nF-Statistic: {AvgGardenSizeHouses_anova.statistic:.4f}, p-value: {AvgGardenSizeHouses_anova.pvalue:.4f}")
print(f"ANOVA Results for Country:\nF-Statistic: {country_anova.statistic:.4f}, p-value: {country_anova.pvalue:.4f}")
print(f"ANOVA Results for Region:\nF-Statistic: {region_anova.statistic:.4f}, p-value: {region_anova.pvalue:.4f}")
print(f"ANOVA Results for County:\nF-Statistic: {county_anova.statistic:.4f}, p-value: {county_anova.pvalue:.4f}\n")



# Interpretation for Rainfall
if rainfall_anova.pvalue < 0.05:
    print("✅ Rainfall has a significant effect on happiness (reject H₀).")
else:
    print("❌ Rainfall does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Temperature
if temperature_anova.pvalue < 0.05:
    print("✅ Temperature has a significant effect on happiness (reject H₀).")
else:
    print("❌ Temperature does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Wind Speed
if windspeed_anova.pvalue < 0.05:
    print("✅ Wind speed has a significant effect on happiness (reject H₀).")
else:
    print("❌ Wind speed does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Min Temperature
if mintemp_anova.pvalue < 0.05:
    print("✅ Min Temperature has a significant effect on happiness (reject H₀).")
else:
    print("❌ Min Temperature does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Max Temperature
if maxtemp_anova.pvalue < 0.05:
    print("✅ Max Temperature has a significant effect on happiness (reject H₀).")
else:
    print("❌ Max Temperature does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Temperature
if avgtemp_anova.pvalue < 0.05:
    print("✅ Avg Temperature has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Temperature does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Wind Gust Speed
if windgustspeed_anova.pvalue < 0.05:
    print("✅ Avg Wind Gust Speed has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Wind Gust Speed does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Wind Direction
if winddirection_anova.pvalue < 0.05:
    print("✅ Avg Wind Direction has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Wind Direction does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Latitude
if latitude_anova.pvalue < 0.05:
    print("✅ Latitude has a significant effect on happiness (reject H₀).")
else:
    print("❌ Latitude does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Longitude
if longitude_anova.pvalue < 0.05:
    print("✅ Longitude has a significant effect on happiness (reject H₀).")
else:
    print("❌ Longitude does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Distance to Park
if avgdistpark_anova.pvalue < 0.05:
    print("✅ Avg Distance to Park has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Distance to Park does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Park Size
if avgparksize_anova.pvalue < 0.05:
    print("✅ Avg Park Size has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Park Size does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Population per Park
if avgpoppark_anova.pvalue < 0.05:
    print("✅ Avg Population per Park has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Population per Park does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Air Quality
if airquality_anova.pvalue < 0.05:
    print("✅ Air Quality has a significant effect on happiness (reject H₀).")
else:
    print("❌ Air Quality does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Water Pollution
if waterpollution_anova.pvalue < 0.05:
    print("✅ Water Pollution has a significant effect on happiness (reject H₀).")
else:
    print("❌ Water Pollution does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Dog Population per Household
if avgdogs_anova.pvalue < 0.05:
    print("✅ Avg Dog Population per Household has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Dog Population per Household does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Cat Population per Household
if avgcats_anova.pvalue < 0.05:
    print("✅ Avg Cat Population per Household has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Cat Population per Household does not significantly affect happiness (fail to reject H₀).")

# Median Distance to Park
if median_distance_park_anova.pvalue < 0.05:
    print("✅ Median distance to park has a significant effect on happiness (reject H₀)")
else:
    print("❌ Median distance to park does not significantly affect happiness (fail to reject H₀)")

# Median Size of Park
if median_size_park_anova.pvalue < 0.05:
    print("✅ Median size of park has a significant effect on happiness (reject H₀)")
else:
    print("❌ Median size of park does not significantly affect happiness (fail to reject H₀)")

# Average Number of Parks Within 1000m
if AvgNbParksWithin1000m_anova.pvalue < 0.05:
    print("✅ Average number of parks within 1000m has a significant effect on happiness (reject H₀)")
else:
    print("❌ Average number of parks within 1000m does not significantly affect happiness (fail to reject H₀)")

# Number of Flats
if NbFlats_anova.pvalue < 0.05:
    print("✅ Number of flats has a significant effect on happiness (reject H₀)")
else:
    print("❌ Number of flats does not significantly affect happiness (fail to reject H₀)")

# Number of Flats with Gardens
if NbFlatsWithGardens_anova.pvalue < 0.05:
    print("✅ Number of flats with gardens has a significant effect on happiness (reject H₀)")
else:
    print("❌ Number of flats with gardens does not significantly affect happiness (fail to reject H₀)")

# Average Garden Size (Flats)
if AvgGardenSizeFlats_anova.pvalue < 0.05:
    print("✅ Average garden size for flats has a significant effect on happiness (reject H₀)")
else:
    print("❌ Average garden size for flats does not significantly affect happiness (fail to reject H₀)")

# Number of Houses
if NbHouses_anova.pvalue < 0.05:
    print("✅ Number of houses has a significant effect on happiness (reject H₀)")
else:
    print("❌ Number of houses does not significantly affect happiness (fail to reject H₀)")

# Number of Houses with Gardens
if NbHousesWithGardens_anova.pvalue < 0.05:
    print("✅ Number of houses with gardens has a significant effect on happiness (reject H₀)")
else:
    print("❌ Number of houses with gardens does not significantly affect happiness (fail to reject H₀)")

# Average Garden Size (Houses)
if AvgGardenSizeHouses_anova.pvalue < 0.05:
    print("✅ Average garden size for houses has a significant effect on happiness (reject H₀)")
else:
    print("❌ Average garden size for houses does not significantly affect happiness (fail to reject H₀)")

# ANOVA Results for Country
if country_anova.pvalue < 0.05:
    print("✅ Country has a significant effect on happiness (reject H₀)")
else:
    print("❌ Country does not significantly affect happiness (fail to reject H₀)")

# ANOVA Results for Region
if region_anova.pvalue < 0.05:
    print("✅ Region has a significant effect on happiness (reject H₀)")
else:
    print("❌ Region does not significantly affect happiness (fail to reject H₀)")

# ANOVA Results for County
if county_anova.pvalue < 0.05:
    print("✅ County has a significant effect on happiness (reject H₀)")
else:
    print("❌ County does not significantly affect happiness (fail to reject H₀)")



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Rainfall_Category'] = pd.qcut(df['total_rain_mm'], q=3, labels=['Low', 'Medium', 'High'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Temperature_Category'] = pd.qcut(df['avg_temp_c'], q=3, labels=['Low', 'Medium', 'High'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['WindSpeed_Cat

ANOVA Results for Rainfall:
F-Statistic: 2.0342, p-value: 0.1327
ANOVA Results for Temperature:
F-Statistic: 3.6193, p-value: 0.0280
ANOVA Results for Wind Speed:
F-Statistic: 1.3496, p-value: 0.2610
ANOVA Results for Min Temperature:
F-Statistic: 2.2823, p-value: 0.1039
ANOVA Results for Max Temperature:
F-Statistic: 8.1147, p-value: 0.0004
ANOVA Results for Avg Temperature:
F-Statistic: 3.6193, p-value: 0.0280
ANOVA Results for Avg Wind Gust Speed:
F-Statistic: 4.8052, p-value: 0.0089
ANOVA Results for Avg Wind Direction:
F-Statistic: 6.5267, p-value: 0.0017
ANOVA Results for Latitude:
F-Statistic: 8.0461, p-value: 0.0004
ANOVA Results for Longitude:
F-Statistic: 2.8519, p-value: 0.0594
ANOVA Results for Avg Distance to Park:
F-Statistic: 6.6386, p-value: 0.0016
ANOVA Results for Avg Park Size:
F-Statistic: 1.6486, p-value: 0.1944
ANOVA Results for Avg Population per Park:
F-Statistic: 0.5397, p-value: 0.5836
ANOVA Results for Air Quality:
F-Statistic: 1.4523, p-value: 0.2401
ANOVA R

  if _f_oneway_is_too_small(samples):


# ANOVA tests for medium happiness

In [4]:
#Medium happiness measure
df = df.dropna(subset=['medium'])

#Create 3 equal sized groups for ANOVA
df['Rainfall_Category'] = pd.qcut(df['total_rain_mm'], q=3, labels=['Low', 'Medium', 'High'])
df['Temperature_Category'] = pd.qcut(df['avg_temp_c'], q=3, labels=['Low', 'Medium', 'High'])
df['WindSpeed_Category'] = pd.qcut(df['avg_wind_speed_m_s'], q=3, labels=['Low', 'Medium', 'High'])
df['MinTemp_Category'] = pd.qcut(df['avg_min_temp_c1'], q=3, labels=['Low', 'Medium', 'High'])
df['MaxTemp_Category'] = pd.qcut(df['avg_max_temp_c'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgTemp_Category'] = pd.qcut(df['avg_temp_c'], q=3, labels=['Low', 'Medium', 'High'])
df['WindGustSpeed_Category'] = pd.qcut(df['avg_wind_gust_speed_m_s'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgWindDirection_Category'] = pd.qcut(df['avg_dominant_wind_direction'], q=3, labels=['Low', 'Medium', 'High'])
df['Latitude_Category'] = pd.qcut(df['Happiness_Latitude'], q=3, labels=['Low', 'Medium', 'High'])
df['Longitude_Category'] = pd.qcut(df['Happiness_Longitude'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgDistPark_Category'] = pd.qcut(df['avg_dist_to_park'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgSizePark_Category'] = pd.qcut(df['avg_size_park'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgPopPark_Category'] = pd.qcut(df['avg_pop_per_park'], q=3, labels=['Low', 'Medium', 'High'])
df['AirQuality_Category'] = pd.qcut(df['AirQuality'], q=3, labels=['Low', 'Medium', 'High'])
df['WaterPollution_Category'] = pd.qcut(df['WaterPollution'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgDogs_Category'] = pd.qcut(df['average_dogs_per_household'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgCats_Category'] = pd.qcut(df['average_cats_per_household'], q=3, labels=['Low', 'Medium', 'High'])
df['MedianDistToPark_Category'] = pd.qcut(df['median_dist_to_park'], q=3, labels=['Low', 'Medium', 'High'])
df['NbFlats_Category'] = pd.qcut(df['nb_flats'], q=3, labels=['Low', 'Medium', 'High'])
df['NbFlatsWithGardens_Category'] = pd.qcut(df['nb_flats_with_gardens'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgGardenSizeFlats_Category'] = pd.qcut(df['avg_garden_size_flats'], q=3, labels=['Low', 'Medium', 'High'])
df['NbHouses_Category'] = pd.qcut(df['nb_houses'], q=3, labels=['Low', 'Medium', 'High'])
df['NbHousesWithGardens_Category'] = pd.qcut(df['nb_houses_with_gardens'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgGardenSizeHouses_Category'] = pd.qcut(df['avg_garden_size_houses'], q=3, labels=['Low', 'Medium', 'High'])


# ANOVA Test for Rainfall Effect on Happiness
rainfall_groups = [df[df['Rainfall_Category'] == cat]['medium'] for cat in ['Low', 'Medium', 'High']]
rainfall_anova = stats.f_oneway(*rainfall_groups)

# ANOVA Test for Temperature Effect on Happiness
temperature_groups = [df[df['Temperature_Category'] == cat]['medium'] for cat in ['Low', 'Medium', 'High']]
temperature_anova = stats.f_oneway(*temperature_groups)

# ANOVA Test for Wind Speed Effect on Happiness
windspeed_groups = [df[df['WindSpeed_Category'] == cat]['medium'] for cat in ['Low', 'Medium', 'High']]
windspeed_anova = stats.f_oneway(*windspeed_groups)

# ANOVA Test for Min Temperature Effect on Happiness
mintemp_groups = [df[df['MinTemp_Category'] == cat]['medium'] for cat in ['Low', 'Medium', 'High']]
mintemp_anova = stats.f_oneway(*mintemp_groups)

# ANOVA Test for Max Temperature Effect on Happiness
maxtemp_groups = [df[df['MaxTemp_Category'] == cat]['medium'] for cat in ['Low', 'Medium', 'High']]
maxtemp_anova = stats.f_oneway(*maxtemp_groups)

# ANOVA Test for Avg Temperature Effect on Happiness
avgtemp_groups = [df[df['AvgTemp_Category'] == cat]['medium'] for cat in ['Low', 'Medium', 'High']]
avgtemp_anova = stats.f_oneway(*avgtemp_groups)

# ANOVA Test for Avg Wind Gust Speed Effect on Happiness
windgustspeed_groups = [df[df['WindGustSpeed_Category'] == cat]['medium'] for cat in ['Low', 'Medium', 'High']]
windgustspeed_anova = stats.f_oneway(*windgustspeed_groups)

# ANOVA Test for Avg Wind Direction on Happiness
winddirection_groups = [df[df['AvgWindDirection_Category'] == cat]['medium'] for cat in ['Low', 'Medium', 'High']]
winddirection_anova = stats.f_oneway(*winddirection_groups)

# ANOVA Test for Latitude on Happiness
latitude_groups = [df[df['Latitude_Category'] == cat]['medium'] for cat in ['Low', 'Medium', 'High']]
latitude_anova = stats.f_oneway(*latitude_groups)

# ANOVA Test for Longitude on Happiness
longitude_groups = [df[df['Longitude_Category'] == cat]['medium'] for cat in ['Low', 'Medium', 'High']]
longitude_anova = stats.f_oneway(*longitude_groups)

# ANOVA Test for Avg Distance to Park on Happiness
avgdistpark_groups = [df[df['AvgDistPark_Category'] == cat]['medium'] for cat in ['Low', 'Medium', 'High']]
avgdistpark_anova = stats.f_oneway(*avgdistpark_groups)

# ANOVA Test for Avg Park Size on Happiness
avgparksize_groups = [df[df['AvgSizePark_Category'] == cat]['medium'] for cat in ['Low', 'Medium', 'High']]
avgparksize_anova = stats.f_oneway(*avgparksize_groups)

# ANOVA Test for Avg Population per Park on Happiness
avgpoppark_groups = [df[df['AvgPopPark_Category'] == cat]['medium'] for cat in ['Low', 'Medium', 'High']]
avgpoppark_anova = stats.f_oneway(*avgpoppark_groups)

# ANOVA Test for Air Quality on Happiness
airquality_groups = [df[df['AirQuality_Category'] == cat]['medium'] for cat in ['Low', 'Medium', 'High']]
airquality_anova = stats.f_oneway(*airquality_groups)

# ANOVA Test for Water Pollution on Happiness
waterpollution_groups = [df[df['WaterPollution_Category'] == cat]['medium'] for cat in ['Low', 'Medium', 'High']]
waterpollution_anova = stats.f_oneway(*waterpollution_groups)

# ANOVA Test for Avg Dog Population per Household on Happiness
avgdogs_groups = [df[df['AvgDogs_Category'] == cat]['medium'] for cat in ['Low', 'Medium', 'High']]
avgdogs_anova = stats.f_oneway(*avgdogs_groups)

# ANOVA Test for Avg Cat Population per Household on Happiness
avgcats_groups = [df[df['AvgCats_Category'] == cat]['medium'] for cat in ['Low', 'Medium', 'High']]
avgcats_anova = stats.f_oneway(*avgcats_groups)

# ANOVA Test for median distance to park on Happiness
median_distance_park_groups = [df[df['MedianDistToPark_Category'] == cat]['medium'] for cat in ['Low', 'Medium', 'High']]
median_distance_park_anova = stats.f_oneway(*median_distance_park_groups)

# ANOVA Test for median distance to park on Happiness
median_size_park_groups = [df[df['MedianSizePark_Category'] == cat]['medium'] for cat in ['Low', 'Medium', 'High']]
median_size_park_anova = stats.f_oneway(*median_size_park_groups)

# ANOVA Test for average number of parks within 1000m to park on Happiness
AvgNbParksWithin1000m_groups = [df[df['AvgNbParksWithin1000m_Category'] == cat]['medium'] for cat in ['Low', 'Medium', 'High']]
AvgNbParksWithin1000m_anova = stats.f_oneway(*AvgNbParksWithin1000m_groups)

# ANOVA Test for number of flats on Happiness
NbFlats_groups = [df[df['NbFlats_Category'] == cat]['medium'] for cat in ['Low', 'Medium', 'High']]
NbFlats_anova = stats.f_oneway(*NbFlats_groups)

# ANOVA Test for number of flats with gardens on Happiness
NbFlatsWithGardens_groups = [df[df['NbFlatsWithGardens_Category'] == cat]['medium'] for cat in ['Low', 'Medium', 'High']]
NbFlatsWithGardens_anova = stats.f_oneway(*NbFlatsWithGardens_groups)

# ANOVA Test for average garden size flats on Happiness
AvgGardenSizeFlats_groups = [df[df['AvgGardenSizeFlats_Category'] == cat]['medium'] for cat in ['Low', 'Medium', 'High']]
AvgGardenSizeFlats_anova = stats.f_oneway(*AvgGardenSizeFlats_groups)

# ANOVA Test for number of houses on Happiness
NbHouses_groups = [df[df['NbHouses_Category'] == cat]['medium'] for cat in ['Low', 'Medium', 'High']]
NbHouses_anova = stats.f_oneway(*NbHouses_groups)

# ANOVA Test for houses with gardens on Happiness
NbHousesWithGardens_groups = [df[df['NbHousesWithGardens_Category'] == cat]['medium'] for cat in ['Low', 'Medium', 'High']]
NbHousesWithGardens_anova = stats.f_oneway(*NbHousesWithGardens_groups)

# ANOVA Test for average garden size houses on Happiness
AvgGardenSizeHouses_groups = [df[df['AvgGardenSizeHouses_Category'] == cat]['medium'] for cat in ['Low', 'Medium', 'High']]
AvgGardenSizeHouses_anova = stats.f_oneway(*AvgGardenSizeHouses_groups)

# ANOVA Test for country on Happiness
country_groups = [df[df['country_name'] == country]['medium'] for country in df['country_name'].unique()]
country_anova = stats.f_oneway(*country_groups)

# ANOVA Test for region on Happiness
region_groups = [df[df['region_name'] == region]['medium'] for region in df['region_name'].unique()]
region_anova = stats.f_oneway(*region_groups)

# ANOVA Test for region on Happiness
county_groups = [df[df['county_name'] == county]['medium'] for county in df['county_name'].unique()]
county_anova = stats.f_oneway(*county_groups)



print(f"ANOVA Results for Rainfall:\nF-Statistic: {rainfall_anova.statistic:.4f}, p-value: {rainfall_anova.pvalue:.4f}")
print(f"ANOVA Results for Temperature:\nF-Statistic: {temperature_anova.statistic:.4f}, p-value: {temperature_anova.pvalue:.4f}")
print(f"ANOVA Results for Wind Speed:\nF-Statistic: {windspeed_anova.statistic:.4f}, p-value: {windspeed_anova.pvalue:.4f}")
print(f"ANOVA Results for Min Temperature:\nF-Statistic: {mintemp_anova.statistic:.4f}, p-value: {mintemp_anova.pvalue:.4f}")
print(f"ANOVA Results for Max Temperature:\nF-Statistic: {maxtemp_anova.statistic:.4f}, p-value: {maxtemp_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Temperature:\nF-Statistic: {avgtemp_anova.statistic:.4f}, p-value: {avgtemp_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Wind Gust Speed:\nF-Statistic: {windgustspeed_anova.statistic:.4f}, p-value: {windgustspeed_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Wind Direction:\nF-Statistic: {winddirection_anova.statistic:.4f}, p-value: {winddirection_anova.pvalue:.4f}")
print(f"ANOVA Results for Latitude:\nF-Statistic: {latitude_anova.statistic:.4f}, p-value: {latitude_anova.pvalue:.4f}")
print(f"ANOVA Results for Longitude:\nF-Statistic: {longitude_anova.statistic:.4f}, p-value: {longitude_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Distance to Park:\nF-Statistic: {avgdistpark_anova.statistic:.4f}, p-value: {avgdistpark_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Park Size:\nF-Statistic: {avgparksize_anova.statistic:.4f}, p-value: {avgparksize_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Population per Park:\nF-Statistic: {avgpoppark_anova.statistic:.4f}, p-value: {avgpoppark_anova.pvalue:.4f}")
print(f"ANOVA Results for Air Quality:\nF-Statistic: {airquality_anova.statistic:.4f}, p-value: {airquality_anova.pvalue:.4f}")
print(f"ANOVA Results for Water Pollution:\nF-Statistic: {waterpollution_anova.statistic:.4f}, p-value: {waterpollution_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Dog Population per Household:\nF-Statistic: {avgdogs_anova.statistic:.4f}, p-value: {avgdogs_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Cat Population per Household:\nF-Statistic: {avgcats_anova.statistic:.4f}, p-value: {avgcats_anova.pvalue:.4f}")
print(f"ANOVA Results for median distance to park:\nF-Statistic: {median_distance_park_anova.statistic:.4f}, p-value: {median_distance_park_anova.pvalue:.4f}")
print(f"ANOVA Results for median size of park:\nF-Statistic: {median_size_park_anova.statistic:.4f}, p-value: {median_size_park_anova.pvalue:.4f}")
print(f"ANOVA Results for average number of parks within 1000m to park:\nF-Statistic: {AvgNbParksWithin1000m_anova.statistic:.4f}, p-value: {AvgNbParksWithin1000m_anova.pvalue:.4f}")
print(f"ANOVA Results for Number of Flats:\nF-Statistic: {NbFlats_anova.statistic:.4f}, p-value: {NbFlats_anova.pvalue:.4f}")
print(f"ANOVA Results for Number of Flats with Gardens:\nF-Statistic: {NbFlatsWithGardens_anova.statistic:.4f}, p-value: {NbFlatsWithGardens_anova.pvalue:.4f}")
print(f"ANOVA Results for Average Garden Size (Flats):\nF-Statistic: {AvgGardenSizeFlats_anova.statistic:.4f}, p-value: {AvgGardenSizeFlats_anova.pvalue:.4f}")
print(f"ANOVA Results for Number of Houses:\nF-Statistic: {NbHouses_anova.statistic:.4f}, p-value: {NbHouses_anova.pvalue:.4f}")
print(f"ANOVA Results for Number of Houses with Gardens:\nF-Statistic: {NbHousesWithGardens_anova.statistic:.4f}, p-value: {NbHousesWithGardens_anova.pvalue:.4f}")
print(f"ANOVA Results for Average Garden Size (Houses):\nF-Statistic: {AvgGardenSizeHouses_anova.statistic:.4f}, p-value: {AvgGardenSizeHouses_anova.pvalue:.4f}")
print(f"ANOVA Results for Country:\nF-Statistic: {country_anova.statistic:.4f}, p-value: {country_anova.pvalue:.4f}")
print(f"ANOVA Results for Region:\nF-Statistic: {region_anova.statistic:.4f}, p-value: {region_anova.pvalue:.4f}")
print(f"ANOVA Results for County:\nF-Statistic: {county_anova.statistic:.4f}, p-value: {county_anova.pvalue:.4f}\n")



# Interpretation for Rainfall
if rainfall_anova.pvalue < 0.05:
    print("✅ Rainfall has a significant effect on happiness (reject H₀).")
else:
    print("❌ Rainfall does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Temperature
if temperature_anova.pvalue < 0.05:
    print("✅ Temperature has a significant effect on happiness (reject H₀).")
else:
    print("❌ Temperature does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Wind Speed
if windspeed_anova.pvalue < 0.05:
    print("✅ Wind speed has a significant effect on happiness (reject H₀).")
else:
    print("❌ Wind speed does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Min Temperature
if mintemp_anova.pvalue < 0.05:
    print("✅ Min Temperature has a significant effect on happiness (reject H₀).")
else:
    print("❌ Min Temperature does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Max Temperature
if maxtemp_anova.pvalue < 0.05:
    print("✅ Max Temperature has a significant effect on happiness (reject H₀).")
else:
    print("❌ Max Temperature does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Temperature
if avgtemp_anova.pvalue < 0.05:
    print("✅ Avg Temperature has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Temperature does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Wind Gust Speed
if windgustspeed_anova.pvalue < 0.05:
    print("✅ Avg Wind Gust Speed has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Wind Gust Speed does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Wind Direction
if winddirection_anova.pvalue < 0.05:
    print("✅ Avg Wind Direction has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Wind Direction does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Latitude
if latitude_anova.pvalue < 0.05:
    print("✅ Latitude has a significant effect on happiness (reject H₀).")
else:
    print("❌ Latitude does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Longitude
if longitude_anova.pvalue < 0.05:
    print("✅ Longitude has a significant effect on happiness (reject H₀).")
else:
    print("❌ Longitude does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Distance to Park
if avgdistpark_anova.pvalue < 0.05:
    print("✅ Avg Distance to Park has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Distance to Park does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Park Size
if avgparksize_anova.pvalue < 0.05:
    print("✅ Avg Park Size has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Park Size does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Population per Park
if avgpoppark_anova.pvalue < 0.05:
    print("✅ Avg Population per Park has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Population per Park does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Air Quality
if airquality_anova.pvalue < 0.05:
    print("✅ Air Quality has a significant effect on happiness (reject H₀).")
else:
    print("❌ Air Quality does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Water Pollution
if waterpollution_anova.pvalue < 0.05:
    print("✅ Water Pollution has a significant effect on happiness (reject H₀).")
else:
    print("❌ Water Pollution does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Dog Population per Household
if avgdogs_anova.pvalue < 0.05:
    print("✅ Avg Dog Population per Household has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Dog Population per Household does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Cat Population per Household
if avgcats_anova.pvalue < 0.05:
    print("✅ Avg Cat Population per Household has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Cat Population per Household does not significantly affect happiness (fail to reject H₀).")

# Median Distance to Park
if median_distance_park_anova.pvalue < 0.05:
    print("✅ Median distance to park has a significant effect on happiness (reject H₀)")
else:
    print("❌ Median distance to park does not significantly affect happiness (fail to reject H₀)")

# Median Size of Park
if median_size_park_anova.pvalue < 0.05:
    print("✅ Median size of park has a significant effect on happiness (reject H₀)")
else:
    print("❌ Median size of park does not significantly affect happiness (fail to reject H₀)")

# Average Number of Parks Within 1000m
if AvgNbParksWithin1000m_anova.pvalue < 0.05:
    print("✅ Average number of parks within 1000m has a significant effect on happiness (reject H₀)")
else:
    print("❌ Average number of parks within 1000m does not significantly affect happiness (fail to reject H₀)")

# Number of Flats
if NbFlats_anova.pvalue < 0.05:
    print("✅ Number of flats has a significant effect on happiness (reject H₀)")
else:
    print("❌ Number of flats does not significantly affect happiness (fail to reject H₀)")

# Number of Flats with Gardens
if NbFlatsWithGardens_anova.pvalue < 0.05:
    print("✅ Number of flats with gardens has a significant effect on happiness (reject H₀)")
else:
    print("❌ Number of flats with gardens does not significantly affect happiness (fail to reject H₀)")

# Average Garden Size (Flats)
if AvgGardenSizeFlats_anova.pvalue < 0.05:
    print("✅ Average garden size for flats has a significant effect on happiness (reject H₀)")
else:
    print("❌ Average garden size for flats does not significantly affect happiness (fail to reject H₀)")

# Number of Houses
if NbHouses_anova.pvalue < 0.05:
    print("✅ Number of houses has a significant effect on happiness (reject H₀)")
else:
    print("❌ Number of houses does not significantly affect happiness (fail to reject H₀)")

# Number of Houses with Gardens
if NbHousesWithGardens_anova.pvalue < 0.05:
    print("✅ Number of houses with gardens has a significant effect on happiness (reject H₀)")
else:
    print("❌ Number of houses with gardens does not significantly affect happiness (fail to reject H₀)")

# Average Garden Size (Houses)
if AvgGardenSizeHouses_anova.pvalue < 0.05:
    print("✅ Average garden size for houses has a significant effect on happiness (reject H₀)")
else:
    print("❌ Average garden size for houses does not significantly affect happiness (fail to reject H₀)")

# ANOVA Results for Country
if country_anova.pvalue < 0.05:
    print("✅ Country has a significant effect on happiness (reject H₀)")
else:
    print("❌ Country does not significantly affect happiness (fail to reject H₀)")

# ANOVA Results for Region
if region_anova.pvalue < 0.05:
    print("✅ Region has a significant effect on happiness (reject H₀)")
else:
    print("❌ Region does not significantly affect happiness (fail to reject H₀)")

# ANOVA Results for County
if county_anova.pvalue < 0.05:
    print("✅ County has a significant effect on happiness (reject H₀)")
else:
    print("❌ County does not significantly affect happiness (fail to reject H₀)")



  if _f_oneway_is_too_small(samples):


ANOVA Results for Rainfall:
F-Statistic: 6.5909, p-value: 0.0016
ANOVA Results for Temperature:
F-Statistic: 0.5561, p-value: 0.5740
ANOVA Results for Wind Speed:
F-Statistic: 2.2928, p-value: 0.1029
ANOVA Results for Min Temperature:
F-Statistic: 0.5994, p-value: 0.5499
ANOVA Results for Max Temperature:
F-Statistic: 1.9836, p-value: 0.1395
ANOVA Results for Avg Temperature:
F-Statistic: 0.5561, p-value: 0.5740
ANOVA Results for Avg Wind Gust Speed:
F-Statistic: 3.6018, p-value: 0.0285
ANOVA Results for Avg Wind Direction:
F-Statistic: 3.1610, p-value: 0.0439
ANOVA Results for Latitude:
F-Statistic: 0.5478, p-value: 0.5788
ANOVA Results for Longitude:
F-Statistic: 4.5627, p-value: 0.0112
ANOVA Results for Avg Distance to Park:
F-Statistic: 32.3357, p-value: 0.0000
ANOVA Results for Avg Park Size:
F-Statistic: 0.7753, p-value: 0.4617
ANOVA Results for Avg Population per Park:
F-Statistic: 8.8792, p-value: 0.0002
ANOVA Results for Air Quality:
F-Statistic: 1.4777, p-value: 0.2343
ANOVA 

# ANOVA tests for high happiness

In [5]:
#Measure for the ANOVA
df = df.dropna(subset=['high'])

#Create 3 equal sized groups for ANOVA
df['Rainfall_Category'] = pd.qcut(df['total_rain_mm'], q=3, labels=['Low', 'Medium', 'High'])
df['Temperature_Category'] = pd.qcut(df['avg_temp_c'], q=3, labels=['Low', 'Medium', 'High'])
df['WindSpeed_Category'] = pd.qcut(df['avg_wind_speed_m_s'], q=3, labels=['Low', 'Medium', 'High'])
df['MinTemp_Category'] = pd.qcut(df['avg_min_temp_c1'], q=3, labels=['Low', 'Medium', 'High'])
df['MaxTemp_Category'] = pd.qcut(df['avg_max_temp_c'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgTemp_Category'] = pd.qcut(df['avg_temp_c'], q=3, labels=['Low', 'Medium', 'High'])
df['WindGustSpeed_Category'] = pd.qcut(df['avg_wind_gust_speed_m_s'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgWindDirection_Category'] = pd.qcut(df['avg_dominant_wind_direction'], q=3, labels=['Low', 'Medium', 'High'])
df['Latitude_Category'] = pd.qcut(df['Happiness_Latitude'], q=3, labels=['Low', 'Medium', 'High'])
df['Longitude_Category'] = pd.qcut(df['Happiness_Longitude'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgDistPark_Category'] = pd.qcut(df['avg_dist_to_park'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgSizePark_Category'] = pd.qcut(df['avg_size_park'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgPopPark_Category'] = pd.qcut(df['avg_pop_per_park'], q=3, labels=['Low', 'Medium', 'High'])
df['AirQuality_Category'] = pd.qcut(df['AirQuality'], q=3, labels=['Low', 'Medium', 'High'])
df['WaterPollution_Category'] = pd.qcut(df['WaterPollution'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgDogs_Category'] = pd.qcut(df['average_dogs_per_household'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgCats_Category'] = pd.qcut(df['average_cats_per_household'], q=3, labels=['Low', 'Medium', 'High'])
df['MedianDistToPark_Category'] = pd.qcut(df['median_dist_to_park'], q=3, labels=['Low', 'Medium', 'High'])
df['NbFlats_Category'] = pd.qcut(df['nb_flats'], q=3, labels=['Low', 'Medium', 'High'])
df['NbFlatsWithGardens_Category'] = pd.qcut(df['nb_flats_with_gardens'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgGardenSizeFlats_Category'] = pd.qcut(df['avg_garden_size_flats'], q=3, labels=['Low', 'Medium', 'High'])
df['NbHouses_Category'] = pd.qcut(df['nb_houses'], q=3, labels=['Low', 'Medium', 'High'])
df['NbHousesWithGardens_Category'] = pd.qcut(df['nb_houses_with_gardens'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgGardenSizeHouses_Category'] = pd.qcut(df['avg_garden_size_houses'], q=3, labels=['Low', 'Medium', 'High'])


# ANOVA Test for Rainfall Effect on Happiness
rainfall_groups = [df[df['Rainfall_Category'] == cat]['high'] for cat in ['Low', 'Medium', 'High']]
rainfall_anova = stats.f_oneway(*rainfall_groups)

# ANOVA Test for Temperature Effect on Happiness
temperature_groups = [df[df['Temperature_Category'] == cat]['high'] for cat in ['Low', 'Medium', 'High']]
temperature_anova = stats.f_oneway(*temperature_groups)

# ANOVA Test for Wind Speed Effect on Happiness
windspeed_groups = [df[df['WindSpeed_Category'] == cat]['high'] for cat in ['Low', 'Medium', 'High']]
windspeed_anova = stats.f_oneway(*windspeed_groups)

# ANOVA Test for Min Temperature Effect on Happiness
mintemp_groups = [df[df['MinTemp_Category'] == cat]['high'] for cat in ['Low', 'Medium', 'High']]
mintemp_anova = stats.f_oneway(*mintemp_groups)

# ANOVA Test for Max Temperature Effect on Happiness
maxtemp_groups = [df[df['MaxTemp_Category'] == cat]['high'] for cat in ['Low', 'Medium', 'High']]
maxtemp_anova = stats.f_oneway(*maxtemp_groups)

# ANOVA Test for Avg Temperature Effect on Happiness
avgtemp_groups = [df[df['AvgTemp_Category'] == cat]['high'] for cat in ['Low', 'Medium', 'High']]
avgtemp_anova = stats.f_oneway(*avgtemp_groups)

# ANOVA Test for Avg Wind Gust Speed Effect on Happiness
windgustspeed_groups = [df[df['WindGustSpeed_Category'] == cat]['high'] for cat in ['Low', 'Medium', 'High']]
windgustspeed_anova = stats.f_oneway(*windgustspeed_groups)

# ANOVA Test for Avg Wind Direction on Happiness
winddirection_groups = [df[df['AvgWindDirection_Category'] == cat]['high'] for cat in ['Low', 'Medium', 'High']]
winddirection_anova = stats.f_oneway(*winddirection_groups)

# ANOVA Test for Latitude on Happiness
latitude_groups = [df[df['Latitude_Category'] == cat]['high'] for cat in ['Low', 'Medium', 'High']]
latitude_anova = stats.f_oneway(*latitude_groups)

# ANOVA Test for Longitude on Happiness
longitude_groups = [df[df['Longitude_Category'] == cat]['high'] for cat in ['Low', 'Medium', 'High']]
longitude_anova = stats.f_oneway(*longitude_groups)

# ANOVA Test for Avg Distance to Park on Happiness
avgdistpark_groups = [df[df['AvgDistPark_Category'] == cat]['high'] for cat in ['Low', 'Medium', 'High']]
avgdistpark_anova = stats.f_oneway(*avgdistpark_groups)

# ANOVA Test for Avg Park Size on Happiness
avgparksize_groups = [df[df['AvgSizePark_Category'] == cat]['high'] for cat in ['Low', 'Medium', 'High']]
avgparksize_anova = stats.f_oneway(*avgparksize_groups)

# ANOVA Test for Avg Population per Park on Happiness
avgpoppark_groups = [df[df['AvgPopPark_Category'] == cat]['high'] for cat in ['Low', 'Medium', 'High']]
avgpoppark_anova = stats.f_oneway(*avgpoppark_groups)

# ANOVA Test for Air Quality on Happiness
airquality_groups = [df[df['AirQuality_Category'] == cat]['high'] for cat in ['Low', 'Medium', 'High']]
airquality_anova = stats.f_oneway(*airquality_groups)

# ANOVA Test for Water Pollution on Happiness
waterpollution_groups = [df[df['WaterPollution_Category'] == cat]['high'] for cat in ['Low', 'Medium', 'High']]
waterpollution_anova = stats.f_oneway(*waterpollution_groups)

# ANOVA Test for Avg Dog Population per Household on Happiness
avgdogs_groups = [df[df['AvgDogs_Category'] == cat]['high'] for cat in ['Low', 'Medium', 'High']]
avgdogs_anova = stats.f_oneway(*avgdogs_groups)

# ANOVA Test for Avg Cat Population per Household on Happiness
avgcats_groups = [df[df['AvgCats_Category'] == cat]['high'] for cat in ['Low', 'Medium', 'High']]
avgcats_anova = stats.f_oneway(*avgcats_groups)

# ANOVA Test for median distance to park on Happiness
median_distance_park_groups = [df[df['MedianDistToPark_Category'] == cat]['high'] for cat in ['Low', 'Medium', 'High']]
median_distance_park_anova = stats.f_oneway(*median_distance_park_groups)

# ANOVA Test for median distance to park on Happiness
median_size_park_groups = [df[df['MedianSizePark_Category'] == cat]['high'] for cat in ['Low', 'Medium', 'High']]
median_size_park_anova = stats.f_oneway(*median_size_park_groups)

# ANOVA Test for average number of parks within 1000m to park on Happiness
AvgNbParksWithin1000m_groups = [df[df['AvgNbParksWithin1000m_Category'] == cat]['high'] for cat in ['Low', 'Medium', 'High']]
AvgNbParksWithin1000m_anova = stats.f_oneway(*AvgNbParksWithin1000m_groups)


# ANOVA Test for number of flats on Happiness
NbFlats_groups = [df[df['NbFlats_Category'] == cat]['high'] for cat in ['Low', 'Medium', 'High']]
NbFlats_anova = stats.f_oneway(*NbFlats_groups)

# ANOVA Test for number of flats with gardens on Happiness
NbFlatsWithGardens_groups = [df[df['NbFlatsWithGardens_Category'] == cat]['high'] for cat in ['Low', 'Medium', 'High']]
NbFlatsWithGardens_anova = stats.f_oneway(*NbFlatsWithGardens_groups)

# ANOVA Test for average garden size flats on Happiness
AvgGardenSizeFlats_groups = [df[df['AvgGardenSizeFlats_Category'] == cat]['high'] for cat in ['Low', 'Medium', 'High']]
AvgGardenSizeFlats_anova = stats.f_oneway(*AvgGardenSizeFlats_groups)

# ANOVA Test for number of houses on Happiness
NbHouses_groups = [df[df['NbHouses_Category'] == cat]['high'] for cat in ['Low', 'Medium', 'High']]
NbHouses_anova = stats.f_oneway(*NbHouses_groups)

# ANOVA Test for houses with gardens on Happiness
NbHousesWithGardens_groups = [df[df['NbHousesWithGardens_Category'] == cat]['high'] for cat in ['Low', 'Medium', 'High']]
NbHousesWithGardens_anova = stats.f_oneway(*NbHousesWithGardens_groups)

# ANOVA Test for average garden size houses on Happiness
AvgGardenSizeHouses_groups = [df[df['AvgGardenSizeHouses_Category'] == cat]['high'] for cat in ['Low', 'Medium', 'High']]
AvgGardenSizeHouses_anova = stats.f_oneway(*AvgGardenSizeHouses_groups)

# ANOVA Test for country on Happiness
country_groups = [df[df['country_name'] == country]['high'] for country in df['country_name'].unique()]
country_anova = stats.f_oneway(*country_groups)

# ANOVA Test for region on Happiness
region_groups = [df[df['region_name'] == region]['high'] for region in df['region_name'].unique()]
region_anova = stats.f_oneway(*region_groups)

# ANOVA Test for region on Happiness
county_groups = [df[df['county_name'] == county]['high'] for county in df['county_name'].unique()]
county_anova = stats.f_oneway(*county_groups)



print(f"ANOVA Results for Rainfall:\nF-Statistic: {rainfall_anova.statistic:.4f}, p-value: {rainfall_anova.pvalue:.4f}")
print(f"ANOVA Results for Temperature:\nF-Statistic: {temperature_anova.statistic:.4f}, p-value: {temperature_anova.pvalue:.4f}")
print(f"ANOVA Results for Wind Speed:\nF-Statistic: {windspeed_anova.statistic:.4f}, p-value: {windspeed_anova.pvalue:.4f}")
print(f"ANOVA Results for Min Temperature:\nF-Statistic: {mintemp_anova.statistic:.4f}, p-value: {mintemp_anova.pvalue:.4f}")
print(f"ANOVA Results for Max Temperature:\nF-Statistic: {maxtemp_anova.statistic:.4f}, p-value: {maxtemp_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Temperature:\nF-Statistic: {avgtemp_anova.statistic:.4f}, p-value: {avgtemp_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Wind Gust Speed:\nF-Statistic: {windgustspeed_anova.statistic:.4f}, p-value: {windgustspeed_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Wind Direction:\nF-Statistic: {winddirection_anova.statistic:.4f}, p-value: {winddirection_anova.pvalue:.4f}")
print(f"ANOVA Results for Latitude:\nF-Statistic: {latitude_anova.statistic:.4f}, p-value: {latitude_anova.pvalue:.4f}")
print(f"ANOVA Results for Longitude:\nF-Statistic: {longitude_anova.statistic:.4f}, p-value: {longitude_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Distance to Park:\nF-Statistic: {avgdistpark_anova.statistic:.4f}, p-value: {avgdistpark_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Park Size:\nF-Statistic: {avgparksize_anova.statistic:.4f}, p-value: {avgparksize_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Population per Park:\nF-Statistic: {avgpoppark_anova.statistic:.4f}, p-value: {avgpoppark_anova.pvalue:.4f}")
print(f"ANOVA Results for Air Quality:\nF-Statistic: {airquality_anova.statistic:.4f}, p-value: {airquality_anova.pvalue:.4f}")
print(f"ANOVA Results for Water Pollution:\nF-Statistic: {waterpollution_anova.statistic:.4f}, p-value: {waterpollution_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Dog Population per Household:\nF-Statistic: {avgdogs_anova.statistic:.4f}, p-value: {avgdogs_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Cat Population per Household:\nF-Statistic: {avgcats_anova.statistic:.4f}, p-value: {avgcats_anova.pvalue:.4f}")
print(f"ANOVA Results for median distance to park:\nF-Statistic: {median_distance_park_anova.statistic:.4f}, p-value: {median_distance_park_anova.pvalue:.4f}")
print(f"ANOVA Results for median size of park:\nF-Statistic: {median_size_park_anova.statistic:.4f}, p-value: {median_size_park_anova.pvalue:.4f}")
print(f"ANOVA Results for average number of parks within 1000m to park:\nF-Statistic: {AvgNbParksWithin1000m_anova.statistic:.4f}, p-value: {AvgNbParksWithin1000m_anova.pvalue:.4f}")
print(f"ANOVA Results for Number of Flats:\nF-Statistic: {NbFlats_anova.statistic:.4f}, p-value: {NbFlats_anova.pvalue:.4f}")
print(f"ANOVA Results for Number of Flats with Gardens:\nF-Statistic: {NbFlatsWithGardens_anova.statistic:.4f}, p-value: {NbFlatsWithGardens_anova.pvalue:.4f}")
print(f"ANOVA Results for Average Garden Size (Flats):\nF-Statistic: {AvgGardenSizeFlats_anova.statistic:.4f}, p-value: {AvgGardenSizeFlats_anova.pvalue:.4f}")
print(f"ANOVA Results for Number of Houses:\nF-Statistic: {NbHouses_anova.statistic:.4f}, p-value: {NbHouses_anova.pvalue:.4f}")
print(f"ANOVA Results for Number of Houses with Gardens:\nF-Statistic: {NbHousesWithGardens_anova.statistic:.4f}, p-value: {NbHousesWithGardens_anova.pvalue:.4f}")
print(f"ANOVA Results for Average Garden Size (Houses):\nF-Statistic: {AvgGardenSizeHouses_anova.statistic:.4f}, p-value: {AvgGardenSizeHouses_anova.pvalue:.4f}")
print(f"ANOVA Results for Country:\nF-Statistic: {country_anova.statistic:.4f}, p-value: {country_anova.pvalue:.4f}")
print(f"ANOVA Results for Region:\nF-Statistic: {region_anova.statistic:.4f}, p-value: {region_anova.pvalue:.4f}")
print(f"ANOVA Results for County:\nF-Statistic: {county_anova.statistic:.4f}, p-value: {county_anova.pvalue:.4f}\n")



# Interpretation for Rainfall
if rainfall_anova.pvalue < 0.05:
    print("✅ Rainfall has a significant effect on happiness (reject H₀).")
else:
    print("❌ Rainfall does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Temperature
if temperature_anova.pvalue < 0.05:
    print("✅ Temperature has a significant effect on happiness (reject H₀).")
else:
    print("❌ Temperature does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Wind Speed
if windspeed_anova.pvalue < 0.05:
    print("✅ Wind speed has a significant effect on happiness (reject H₀).")
else:
    print("❌ Wind speed does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Min Temperature
if mintemp_anova.pvalue < 0.05:
    print("✅ Min Temperature has a significant effect on happiness (reject H₀).")
else:
    print("❌ Min Temperature does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Max Temperature
if maxtemp_anova.pvalue < 0.05:
    print("✅ Max Temperature has a significant effect on happiness (reject H₀).")
else:
    print("❌ Max Temperature does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Temperature
if avgtemp_anova.pvalue < 0.05:
    print("✅ Avg Temperature has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Temperature does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Wind Gust Speed
if windgustspeed_anova.pvalue < 0.05:
    print("✅ Avg Wind Gust Speed has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Wind Gust Speed does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Wind Direction
if winddirection_anova.pvalue < 0.05:
    print("✅ Avg Wind Direction has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Wind Direction does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Latitude
if latitude_anova.pvalue < 0.05:
    print("✅ Latitude has a significant effect on happiness (reject H₀).")
else:
    print("❌ Latitude does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Longitude
if longitude_anova.pvalue < 0.05:
    print("✅ Longitude has a significant effect on happiness (reject H₀).")
else:
    print("❌ Longitude does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Distance to Park
if avgdistpark_anova.pvalue < 0.05:
    print("✅ Avg Distance to Park has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Distance to Park does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Park Size
if avgparksize_anova.pvalue < 0.05:
    print("✅ Avg Park Size has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Park Size does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Population per Park
if avgpoppark_anova.pvalue < 0.05:
    print("✅ Avg Population per Park has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Population per Park does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Air Quality
if airquality_anova.pvalue < 0.05:
    print("✅ Air Quality has a significant effect on happiness (reject H₀).")
else:
    print("❌ Air Quality does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Water Pollution
if waterpollution_anova.pvalue < 0.05:
    print("✅ Water Pollution has a significant effect on happiness (reject H₀).")
else:
    print("❌ Water Pollution does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Dog Population per Household
if avgdogs_anova.pvalue < 0.05:
    print("✅ Avg Dog Population per Household has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Dog Population per Household does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Cat Population per Household
if avgcats_anova.pvalue < 0.05:
    print("✅ Avg Cat Population per Household has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Cat Population per Household does not significantly affect happiness (fail to reject H₀).")

# Median Distance to Park
if median_distance_park_anova.pvalue < 0.05:
    print("✅ Median distance to park has a significant effect on happiness (reject H₀)")
else:
    print("❌ Median distance to park does not significantly affect happiness (fail to reject H₀)")

# Median Size of Park
if median_size_park_anova.pvalue < 0.05:
    print("✅ Median size of park has a significant effect on happiness (reject H₀)")
else:
    print("❌ Median size of park does not significantly affect happiness (fail to reject H₀)")

# Average Number of Parks Within 1000m
if AvgNbParksWithin1000m_anova.pvalue < 0.05:
    print("✅ Average number of parks within 1000m has a significant effect on happiness (reject H₀)")
else:
    print("❌ Average number of parks within 1000m does not significantly affect happiness (fail to reject H₀)")


# Number of Flats
if NbFlats_anova.pvalue < 0.05:
    print("✅ Number of flats has a significant effect on happiness (reject H₀)")
else:
    print("❌ Number of flats does not significantly affect happiness (fail to reject H₀)")

# Number of Flats with Gardens
if NbFlatsWithGardens_anova.pvalue < 0.05:
    print("✅ Number of flats with gardens has a significant effect on happiness (reject H₀)")
else:
    print("❌ Number of flats with gardens does not significantly affect happiness (fail to reject H₀)")

# Average Garden Size (Flats)
if AvgGardenSizeFlats_anova.pvalue < 0.05:
    print("✅ Average garden size for flats has a significant effect on happiness (reject H₀)")
else:
    print("❌ Average garden size for flats does not significantly affect happiness (fail to reject H₀)")

# Number of Houses
if NbHouses_anova.pvalue < 0.05:
    print("✅ Number of houses has a significant effect on happiness (reject H₀)")
else:
    print("❌ Number of houses does not significantly affect happiness (fail to reject H₀)")

# Number of Houses with Gardens
if NbHousesWithGardens_anova.pvalue < 0.05:
    print("✅ Number of houses with gardens has a significant effect on happiness (reject H₀)")
else:
    print("❌ Number of houses with gardens does not significantly affect happiness (fail to reject H₀)")

# Average Garden Size (Houses)
if AvgGardenSizeHouses_anova.pvalue < 0.05:
    print("✅ Average garden size for houses has a significant effect on happiness (reject H₀)")
else:
    print("❌ Average garden size for houses does not significantly affect happiness (fail to reject H₀)")

# ANOVA Results for Country
if country_anova.pvalue < 0.05:
    print("✅ Country has a significant effect on happiness (reject H₀)")
else:
    print("❌ Country does not significantly affect happiness (fail to reject H₀)")

# ANOVA Results for Region
if region_anova.pvalue < 0.05:
    print("✅ Region has a significant effect on happiness (reject H₀)")
else:
    print("❌ Region does not significantly affect happiness (fail to reject H₀)")

# ANOVA Results for County
if county_anova.pvalue < 0.05:
    print("✅ County has a significant effect on happiness (reject H₀)")
else:
    print("❌ County does not significantly affect happiness (fail to reject H₀)")



ANOVA Results for Rainfall:
F-Statistic: 1.7331, p-value: 0.1786
ANOVA Results for Temperature:
F-Statistic: 4.7462, p-value: 0.0094
ANOVA Results for Wind Speed:
F-Statistic: 7.7699, p-value: 0.0005
ANOVA Results for Min Temperature:
F-Statistic: 3.3562, p-value: 0.0363
ANOVA Results for Max Temperature:
F-Statistic: 7.0845, p-value: 0.0010
ANOVA Results for Avg Temperature:
F-Statistic: 4.7462, p-value: 0.0094
ANOVA Results for Avg Wind Gust Speed:
F-Statistic: 7.2377, p-value: 0.0009
ANOVA Results for Avg Wind Direction:
F-Statistic: 3.7154, p-value: 0.0255
ANOVA Results for Latitude:
F-Statistic: 6.8358, p-value: 0.0013
ANOVA Results for Longitude:
F-Statistic: 2.5448, p-value: 0.0803
ANOVA Results for Avg Distance to Park:
F-Statistic: 0.9724, p-value: 0.3796
ANOVA Results for Avg Park Size:
F-Statistic: 0.8329, p-value: 0.4360
ANOVA Results for Avg Population per Park:
F-Statistic: 0.5111, p-value: 0.6005
ANOVA Results for Air Quality:
F-Statistic: 0.8026, p-value: 0.4517
ANOVA R

  if _f_oneway_is_too_small(samples):


# ANOVA tests for very high happiness

In [6]:
#Meausure for the ANOVA
df = df.dropna(subset=['very_high'])

#Create 3 equal sized groups for ANOVA
df['Rainfall_Category'] = pd.qcut(df['total_rain_mm'], q=3, labels=['Low', 'Medium', 'High'])
df['Temperature_Category'] = pd.qcut(df['avg_temp_c'], q=3, labels=['Low', 'Medium', 'High'])
df['WindSpeed_Category'] = pd.qcut(df['avg_wind_speed_m_s'], q=3, labels=['Low', 'Medium', 'High'])
df['MinTemp_Category'] = pd.qcut(df['avg_min_temp_c1'], q=3, labels=['Low', 'Medium', 'High'])
df['MaxTemp_Category'] = pd.qcut(df['avg_max_temp_c'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgTemp_Category'] = pd.qcut(df['avg_temp_c'], q=3, labels=['Low', 'Medium', 'High'])
df['WindGustSpeed_Category'] = pd.qcut(df['avg_wind_gust_speed_m_s'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgWindDirection_Category'] = pd.qcut(df['avg_dominant_wind_direction'], q=3, labels=['Low', 'Medium', 'High'])
df['Latitude_Category'] = pd.qcut(df['Happiness_Latitude'], q=3, labels=['Low', 'Medium', 'High'])
df['Longitude_Category'] = pd.qcut(df['Happiness_Longitude'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgDistPark_Category'] = pd.qcut(df['avg_dist_to_park'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgSizePark_Category'] = pd.qcut(df['avg_size_park'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgPopPark_Category'] = pd.qcut(df['avg_pop_per_park'], q=3, labels=['Low', 'Medium', 'High'])
df['AirQuality_Category'] = pd.qcut(df['AirQuality'], q=3, labels=['Low', 'Medium', 'High'])
df['WaterPollution_Category'] = pd.qcut(df['WaterPollution'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgDogs_Category'] = pd.qcut(df['average_dogs_per_household'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgCats_Category'] = pd.qcut(df['average_cats_per_household'], q=3, labels=['Low', 'Medium', 'High'])
df['MedianDistToPark_Category'] = pd.qcut(df['median_dist_to_park'], q=3, labels=['Low', 'Medium', 'High'])
df['NbFlats_Category'] = pd.qcut(df['nb_flats'], q=3, labels=['Low', 'Medium', 'High'])
df['NbFlatsWithGardens_Category'] = pd.qcut(df['nb_flats_with_gardens'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgGardenSizeFlats_Category'] = pd.qcut(df['avg_garden_size_flats'], q=3, labels=['Low', 'Medium', 'High'])
df['NbHouses_Category'] = pd.qcut(df['nb_houses'], q=3, labels=['Low', 'Medium', 'High'])
df['NbHousesWithGardens_Category'] = pd.qcut(df['nb_houses_with_gardens'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgGardenSizeHouses_Category'] = pd.qcut(df['avg_garden_size_houses'], q=3, labels=['Low', 'Medium', 'High'])


# ANOVA Test for Rainfall Effect on Happiness
rainfall_groups = [df[df['Rainfall_Category'] == cat]['very_high'] for cat in ['Low', 'Medium', 'High']]
rainfall_anova = stats.f_oneway(*rainfall_groups)

# ANOVA Test for Temperature Effect on Happiness
temperature_groups = [df[df['Temperature_Category'] == cat]['very_high'] for cat in ['Low', 'Medium', 'High']]
temperature_anova = stats.f_oneway(*temperature_groups)

# ANOVA Test for Wind Speed Effect on Happiness
windspeed_groups = [df[df['WindSpeed_Category'] == cat]['very_high'] for cat in ['Low', 'Medium', 'High']]
windspeed_anova = stats.f_oneway(*windspeed_groups)

# ANOVA Test for Min Temperature Effect on Happiness
mintemp_groups = [df[df['MinTemp_Category'] == cat]['very_high'] for cat in ['Low', 'Medium', 'High']]
mintemp_anova = stats.f_oneway(*mintemp_groups)

# ANOVA Test for Max Temperature Effect on Happiness
maxtemp_groups = [df[df['MaxTemp_Category'] == cat]['very_high'] for cat in ['Low', 'Medium', 'High']]
maxtemp_anova = stats.f_oneway(*maxtemp_groups)

# ANOVA Test for Avg Temperature Effect on Happiness
avgtemp_groups = [df[df['AvgTemp_Category'] == cat]['very_high'] for cat in ['Low', 'Medium', 'High']]
avgtemp_anova = stats.f_oneway(*avgtemp_groups)

# ANOVA Test for Avg Wind Gust Speed Effect on Happiness
windgustspeed_groups = [df[df['WindGustSpeed_Category'] == cat]['very_high'] for cat in ['Low', 'Medium', 'High']]
windgustspeed_anova = stats.f_oneway(*windgustspeed_groups)

# ANOVA Test for Avg Wind Direction on Happiness
winddirection_groups = [df[df['AvgWindDirection_Category'] == cat]['very_high'] for cat in ['Low', 'Medium', 'High']]
winddirection_anova = stats.f_oneway(*winddirection_groups)

# ANOVA Test for Latitude on Happiness
latitude_groups = [df[df['Latitude_Category'] == cat]['very_high'] for cat in ['Low', 'Medium', 'High']]
latitude_anova = stats.f_oneway(*latitude_groups)

# ANOVA Test for Longitude on Happiness
longitude_groups = [df[df['Longitude_Category'] == cat]['very_high'] for cat in ['Low', 'Medium', 'High']]
longitude_anova = stats.f_oneway(*longitude_groups)

# ANOVA Test for Avg Distance to Park on Happiness
avgdistpark_groups = [df[df['AvgDistPark_Category'] == cat]['very_high'] for cat in ['Low', 'Medium', 'High']]
avgdistpark_anova = stats.f_oneway(*avgdistpark_groups)

# ANOVA Test for Avg Park Size on Happiness
avgparksize_groups = [df[df['AvgSizePark_Category'] == cat]['very_high'] for cat in ['Low', 'Medium', 'High']]
avgparksize_anova = stats.f_oneway(*avgparksize_groups)

# ANOVA Test for Avg Population per Park on Happiness
avgpoppark_groups = [df[df['AvgPopPark_Category'] == cat]['very_high'] for cat in ['Low', 'Medium', 'High']]
avgpoppark_anova = stats.f_oneway(*avgpoppark_groups)

# ANOVA Test for Air Quality on Happiness
airquality_groups = [df[df['AirQuality_Category'] == cat]['very_high'] for cat in ['Low', 'Medium', 'High']]
airquality_anova = stats.f_oneway(*airquality_groups)

# ANOVA Test for Water Pollution on Happiness
waterpollution_groups = [df[df['WaterPollution_Category'] == cat]['very_high'] for cat in ['Low', 'Medium', 'High']]
waterpollution_anova = stats.f_oneway(*waterpollution_groups)

# ANOVA Test for Avg Dog Population per Household on Happiness
avgdogs_groups = [df[df['AvgDogs_Category'] == cat]['very_high'] for cat in ['Low', 'Medium', 'High']]
avgdogs_anova = stats.f_oneway(*avgdogs_groups)

# ANOVA Test for Avg Cat Population per Household on Happiness
avgcats_groups = [df[df['AvgCats_Category'] == cat]['very_high'] for cat in ['Low', 'Medium', 'High']]
avgcats_anova = stats.f_oneway(*avgcats_groups)

# ANOVA Test for median distance to park on Happiness
median_distance_park_groups = [df[df['MedianDistToPark_Category'] == cat]['very_high'] for cat in ['Low', 'Medium', 'High']]
median_distance_park_anova = stats.f_oneway(*median_distance_park_groups)

# ANOVA Test for median distance to park on Happiness
median_size_park_groups = [df[df['MedianSizePark_Category'] == cat]['very_high'] for cat in ['Low', 'Medium', 'High']]
median_size_park_anova = stats.f_oneway(*median_size_park_groups)

# ANOVA Test for average number of parks within 1000m to park on Happiness
AvgNbParksWithin1000m_groups = [df[df['AvgNbParksWithin1000m_Category'] == cat]['very_high'] for cat in ['Low', 'Medium', 'High']]
AvgNbParksWithin1000m_anova = stats.f_oneway(*AvgNbParksWithin1000m_groups)

# ANOVA Test for number of flats on Happiness
NbFlats_groups = [df[df['NbFlats_Category'] == cat]['very_high'] for cat in ['Low', 'Medium', 'High']]
NbFlats_anova = stats.f_oneway(*NbFlats_groups)

# ANOVA Test for number of flats with gardens on Happiness
NbFlatsWithGardens_groups = [df[df['NbFlatsWithGardens_Category'] == cat]['very_high'] for cat in ['Low', 'Medium', 'High']]
NbFlatsWithGardens_anova = stats.f_oneway(*NbFlatsWithGardens_groups)

# ANOVA Test for average garden size flats on Happiness
AvgGardenSizeFlats_groups = [df[df['AvgGardenSizeFlats_Category'] == cat]['very_high'] for cat in ['Low', 'Medium', 'High']]
AvgGardenSizeFlats_anova = stats.f_oneway(*AvgGardenSizeFlats_groups)

# ANOVA Test for number of houses on Happiness
NbHouses_groups = [df[df['NbHouses_Category'] == cat]['very_high'] for cat in ['Low', 'Medium', 'High']]
NbHouses_anova = stats.f_oneway(*NbHouses_groups)

# ANOVA Test for houses with gardens on Happiness
NbHousesWithGardens_groups = [df[df['NbHousesWithGardens_Category'] == cat]['very_high'] for cat in ['Low', 'Medium', 'High']]
NbHousesWithGardens_anova = stats.f_oneway(*NbHousesWithGardens_groups)

# ANOVA Test for average garden size houses on Happiness
AvgGardenSizeHouses_groups = [df[df['AvgGardenSizeHouses_Category'] == cat]['very_high'] for cat in ['Low', 'Medium', 'High']]
AvgGardenSizeHouses_anova = stats.f_oneway(*AvgGardenSizeHouses_groups)

# ANOVA Test for country on Happiness
country_groups = [df[df['country_name'] == country]['very_high'] for country in df['country_name'].unique()]
country_anova = stats.f_oneway(*country_groups)

# ANOVA Test for region on Happiness
region_groups = [df[df['region_name'] == region]['very_high'] for region in df['region_name'].unique()]
region_anova = stats.f_oneway(*region_groups)

# ANOVA Test for region on Happiness
county_groups = [df[df['county_name'] == county]['very_high'] for county in df['county_name'].unique()]
county_anova = stats.f_oneway(*county_groups)



print(f"ANOVA Results for Rainfall:\nF-Statistic: {rainfall_anova.statistic:.4f}, p-value: {rainfall_anova.pvalue:.4f}")
print(f"ANOVA Results for Temperature:\nF-Statistic: {temperature_anova.statistic:.4f}, p-value: {temperature_anova.pvalue:.4f}")
print(f"ANOVA Results for Wind Speed:\nF-Statistic: {windspeed_anova.statistic:.4f}, p-value: {windspeed_anova.pvalue:.4f}")
print(f"ANOVA Results for Min Temperature:\nF-Statistic: {mintemp_anova.statistic:.4f}, p-value: {mintemp_anova.pvalue:.4f}")
print(f"ANOVA Results for Max Temperature:\nF-Statistic: {maxtemp_anova.statistic:.4f}, p-value: {maxtemp_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Temperature:\nF-Statistic: {avgtemp_anova.statistic:.4f}, p-value: {avgtemp_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Wind Gust Speed:\nF-Statistic: {windgustspeed_anova.statistic:.4f}, p-value: {windgustspeed_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Wind Direction:\nF-Statistic: {winddirection_anova.statistic:.4f}, p-value: {winddirection_anova.pvalue:.4f}")
print(f"ANOVA Results for Latitude:\nF-Statistic: {latitude_anova.statistic:.4f}, p-value: {latitude_anova.pvalue:.4f}")
print(f"ANOVA Results for Longitude:\nF-Statistic: {longitude_anova.statistic:.4f}, p-value: {longitude_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Distance to Park:\nF-Statistic: {avgdistpark_anova.statistic:.4f}, p-value: {avgdistpark_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Park Size:\nF-Statistic: {avgparksize_anova.statistic:.4f}, p-value: {avgparksize_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Population per Park:\nF-Statistic: {avgpoppark_anova.statistic:.4f}, p-value: {avgpoppark_anova.pvalue:.4f}")
print(f"ANOVA Results for Air Quality:\nF-Statistic: {airquality_anova.statistic:.4f}, p-value: {airquality_anova.pvalue:.4f}")
print(f"ANOVA Results for Water Pollution:\nF-Statistic: {waterpollution_anova.statistic:.4f}, p-value: {waterpollution_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Dog Population per Household:\nF-Statistic: {avgdogs_anova.statistic:.4f}, p-value: {avgdogs_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Cat Population per Household:\nF-Statistic: {avgcats_anova.statistic:.4f}, p-value: {avgcats_anova.pvalue:.4f}")
print(f"ANOVA Results for median distance to park:\nF-Statistic: {median_distance_park_anova.statistic:.4f}, p-value: {median_distance_park_anova.pvalue:.4f}")
print(f"ANOVA Results for median size of park:\nF-Statistic: {median_size_park_anova.statistic:.4f}, p-value: {median_size_park_anova.pvalue:.4f}")
print(f"ANOVA Results for average number of parks within 1000m to park:\nF-Statistic: {AvgNbParksWithin1000m_anova.statistic:.4f}, p-value: {AvgNbParksWithin1000m_anova.pvalue:.4f}")
print(f"ANOVA Results for Number of Flats:\nF-Statistic: {NbFlats_anova.statistic:.4f}, p-value: {NbFlats_anova.pvalue:.4f}")
print(f"ANOVA Results for Number of Flats with Gardens:\nF-Statistic: {NbFlatsWithGardens_anova.statistic:.4f}, p-value: {NbFlatsWithGardens_anova.pvalue:.4f}")
print(f"ANOVA Results for Average Garden Size (Flats):\nF-Statistic: {AvgGardenSizeFlats_anova.statistic:.4f}, p-value: {AvgGardenSizeFlats_anova.pvalue:.4f}")
print(f"ANOVA Results for Number of Houses:\nF-Statistic: {NbHouses_anova.statistic:.4f}, p-value: {NbHouses_anova.pvalue:.4f}")
print(f"ANOVA Results for Number of Houses with Gardens:\nF-Statistic: {NbHousesWithGardens_anova.statistic:.4f}, p-value: {NbHousesWithGardens_anova.pvalue:.4f}")
print(f"ANOVA Results for Average Garden Size (Houses):\nF-Statistic: {AvgGardenSizeHouses_anova.statistic:.4f}, p-value: {AvgGardenSizeHouses_anova.pvalue:.4f}")
print(f"ANOVA Results for Country:\nF-Statistic: {country_anova.statistic:.4f}, p-value: {country_anova.pvalue:.4f}")
print(f"ANOVA Results for Region:\nF-Statistic: {region_anova.statistic:.4f}, p-value: {region_anova.pvalue:.4f}")
print(f"ANOVA Results for County:\nF-Statistic: {county_anova.statistic:.4f}, p-value: {county_anova.pvalue:.4f}\n")



# Interpretation for Rainfall
if rainfall_anova.pvalue < 0.05:
    print("✅ Rainfall has a significant effect on happiness (reject H₀).")
else:
    print("❌ Rainfall does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Temperature
if temperature_anova.pvalue < 0.05:
    print("✅ Temperature has a significant effect on happiness (reject H₀).")
else:
    print("❌ Temperature does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Wind Speed
if windspeed_anova.pvalue < 0.05:
    print("✅ Wind speed has a significant effect on happiness (reject H₀).")
else:
    print("❌ Wind speed does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Min Temperature
if mintemp_anova.pvalue < 0.05:
    print("✅ Min Temperature has a significant effect on happiness (reject H₀).")
else:
    print("❌ Min Temperature does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Max Temperature
if maxtemp_anova.pvalue < 0.05:
    print("✅ Max Temperature has a significant effect on happiness (reject H₀).")
else:
    print("❌ Max Temperature does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Temperature
if avgtemp_anova.pvalue < 0.05:
    print("✅ Avg Temperature has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Temperature does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Wind Gust Speed
if windgustspeed_anova.pvalue < 0.05:
    print("✅ Avg Wind Gust Speed has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Wind Gust Speed does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Wind Direction
if winddirection_anova.pvalue < 0.05:
    print("✅ Avg Wind Direction has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Wind Direction does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Latitude
if latitude_anova.pvalue < 0.05:
    print("✅ Latitude has a significant effect on happiness (reject H₀).")
else:
    print("❌ Latitude does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Longitude
if longitude_anova.pvalue < 0.05:
    print("✅ Longitude has a significant effect on happiness (reject H₀).")
else:
    print("❌ Longitude does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Distance to Park
if avgdistpark_anova.pvalue < 0.05:
    print("✅ Avg Distance to Park has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Distance to Park does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Park Size
if avgparksize_anova.pvalue < 0.05:
    print("✅ Avg Park Size has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Park Size does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Population per Park
if avgpoppark_anova.pvalue < 0.05:
    print("✅ Avg Population per Park has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Population per Park does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Air Quality
if airquality_anova.pvalue < 0.05:
    print("✅ Air Quality has a significant effect on happiness (reject H₀).")
else:
    print("❌ Air Quality does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Water Pollution
if waterpollution_anova.pvalue < 0.05:
    print("✅ Water Pollution has a significant effect on happiness (reject H₀).")
else:
    print("❌ Water Pollution does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Dog Population per Household
if avgdogs_anova.pvalue < 0.05:
    print("✅ Avg Dog Population per Household has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Dog Population per Household does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Cat Population per Household
if avgcats_anova.pvalue < 0.05:
    print("✅ Avg Cat Population per Household has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Cat Population per Household does not significantly affect happiness (fail to reject H₀).")

# Median Distance to Park
if median_distance_park_anova.pvalue < 0.05:
    print("✅ Median distance to park has a significant effect on happiness (reject H₀)")
else:
    print("❌ Median distance to park does not significantly affect happiness (fail to reject H₀)")

# Median Size of Park
if median_size_park_anova.pvalue < 0.05:
    print("✅ Median size of park has a significant effect on happiness (reject H₀)")
else:
    print("❌ Median size of park does not significantly affect happiness (fail to reject H₀)")

# Average Number of Parks Within 1000m
if AvgNbParksWithin1000m_anova.pvalue < 0.05:
    print("✅ Average number of parks within 1000m has a significant effect on happiness (reject H₀)")
else:
    print("❌ Average number of parks within 1000m does not significantly affect happiness (fail to reject H₀)")

# Number of Flats
if NbFlats_anova.pvalue < 0.05:
    print("✅ Number of flats has a significant effect on happiness (reject H₀)")
else:
    print("❌ Number of flats does not significantly affect happiness (fail to reject H₀)")

# Number of Flats with Gardens
if NbFlatsWithGardens_anova.pvalue < 0.05:
    print("✅ Number of flats with gardens has a significant effect on happiness (reject H₀)")
else:
    print("❌ Number of flats with gardens does not significantly affect happiness (fail to reject H₀)")

# Average Garden Size (Flats)
if AvgGardenSizeFlats_anova.pvalue < 0.05:
    print("✅ Average garden size for flats has a significant effect on happiness (reject H₀)")
else:
    print("❌ Average garden size for flats does not significantly affect happiness (fail to reject H₀)")

# Number of Houses
if NbHouses_anova.pvalue < 0.05:
    print("✅ Number of houses has a significant effect on happiness (reject H₀)")
else:
    print("❌ Number of houses does not significantly affect happiness (fail to reject H₀)")

# Number of Houses with Gardens
if NbHousesWithGardens_anova.pvalue < 0.05:
    print("✅ Number of houses with gardens has a significant effect on happiness (reject H₀)")
else:
    print("❌ Number of houses with gardens does not significantly affect happiness (fail to reject H₀)")

# Average Garden Size (Houses)
if AvgGardenSizeHouses_anova.pvalue < 0.05:
    print("✅ Average garden size for houses has a significant effect on happiness (reject H₀)")
else:
    print("❌ Average garden size for houses does not significantly affect happiness (fail to reject H₀)")

# ANOVA Results for Country
if country_anova.pvalue < 0.05:
    print("✅ Country has a significant effect on happiness (reject H₀)")
else:
    print("❌ Country does not significantly affect happiness (fail to reject H₀)")

# ANOVA Results for Region
if region_anova.pvalue < 0.05:
    print("✅ Region has a significant effect on happiness (reject H₀)")
else:
    print("❌ Region does not significantly affect happiness (fail to reject H₀)")

# ANOVA Results for County
if county_anova.pvalue < 0.05:
    print("✅ County has a significant effect on happiness (reject H₀)")
else:
    print("❌ County does not significantly affect happiness (fail to reject H₀)")



ANOVA Results for Rainfall:
F-Statistic: 6.5744, p-value: 0.0016
ANOVA Results for Temperature:
F-Statistic: 3.9036, p-value: 0.0213
ANOVA Results for Wind Speed:
F-Statistic: 12.6037, p-value: 0.0000
ANOVA Results for Min Temperature:
F-Statistic: 1.8173, p-value: 0.1644
ANOVA Results for Max Temperature:
F-Statistic: 6.9335, p-value: 0.0011
ANOVA Results for Avg Temperature:
F-Statistic: 3.9036, p-value: 0.0213
ANOVA Results for Avg Wind Gust Speed:
F-Statistic: 9.3587, p-value: 0.0001
ANOVA Results for Avg Wind Direction:
F-Statistic: 3.7928, p-value: 0.0237
ANOVA Results for Latitude:
F-Statistic: 3.9072, p-value: 0.0212
ANOVA Results for Longitude:
F-Statistic: 3.7838, p-value: 0.0239
ANOVA Results for Avg Distance to Park:
F-Statistic: 35.7922, p-value: 0.0000
ANOVA Results for Avg Park Size:
F-Statistic: 3.5055, p-value: 0.0315
ANOVA Results for Avg Population per Park:
F-Statistic: 5.2433, p-value: 0.0059
ANOVA Results for Air Quality:
F-Statistic: 0.2574, p-value: 0.7737
ANOVA

  if _f_oneway_is_too_small(samples):


# ANOVA tests for the new high happiness category we created

In [10]:
#Measure for the ANOVA
df = df.dropna(subset=['new_high'])

#Create 3 equal sized groups for ANOVA
df['Rainfall_Category'] = pd.qcut(df['total_rain_mm'], q=3, labels=['Low', 'Medium', 'High'])
df['Temperature_Category'] = pd.qcut(df['avg_temp_c'], q=3, labels=['Low', 'Medium', 'High'])
df['WindSpeed_Category'] = pd.qcut(df['avg_wind_speed_m_s'], q=3, labels=['Low', 'Medium', 'High'])
df['MinTemp_Category'] = pd.qcut(df['avg_min_temp_c1'], q=3, labels=['Low', 'Medium', 'High'])
df['MaxTemp_Category'] = pd.qcut(df['avg_max_temp_c'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgTemp_Category'] = pd.qcut(df['avg_temp_c'], q=3, labels=['Low', 'Medium', 'High'])
df['WindGustSpeed_Category'] = pd.qcut(df['avg_wind_gust_speed_m_s'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgWindDirection_Category'] = pd.qcut(df['avg_dominant_wind_direction'], q=3, labels=['Low', 'Medium', 'High'])
df['Latitude_Category'] = pd.qcut(df['Happiness_Latitude'], q=3, labels=['Low', 'Medium', 'High'])
df['Longitude_Category'] = pd.qcut(df['Happiness_Longitude'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgDistPark_Category'] = pd.qcut(df['avg_dist_to_park'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgSizePark_Category'] = pd.qcut(df['avg_size_park'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgPopPark_Category'] = pd.qcut(df['avg_pop_per_park'], q=3, labels=['Low', 'Medium', 'High'])
df['AirQuality_Category'] = pd.qcut(df['AirQuality'], q=3, labels=['Low', 'Medium', 'High'])
df['WaterPollution_Category'] = pd.qcut(df['WaterPollution'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgDogs_Category'] = pd.qcut(df['average_dogs_per_household'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgCats_Category'] = pd.qcut(df['average_cats_per_household'], q=3, labels=['Low', 'Medium', 'High'])
df['MedianDistToPark_Category'] = pd.qcut(df['median_dist_to_park'], q=3, labels=['Low', 'Medium', 'High'])
df['NbFlats_Category'] = pd.qcut(df['nb_flats'], q=3, labels=['Low', 'Medium', 'High'])
df['NbFlatsWithGardens_Category'] = pd.qcut(df['nb_flats_with_gardens'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgGardenSizeFlats_Category'] = pd.qcut(df['avg_garden_size_flats'], q=3, labels=['Low', 'Medium', 'High'])
df['NbHouses_Category'] = pd.qcut(df['nb_houses'], q=3, labels=['Low', 'Medium', 'High'])
df['NbHousesWithGardens_Category'] = pd.qcut(df['nb_houses_with_gardens'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgGardenSizeHouses_Category'] = pd.qcut(df['avg_garden_size_houses'], q=3, labels=['Low', 'Medium', 'High'])


# ANOVA Test for Rainfall Effect on Happiness
rainfall_groups = [df[df['Rainfall_Category'] == cat]['new_high'] for cat in ['Low', 'Medium', 'High']]
rainfall_anova = stats.f_oneway(*rainfall_groups)

# ANOVA Test for Temperature Effect on Happiness
temperature_groups = [df[df['Temperature_Category'] == cat]['new_high'] for cat in ['Low', 'Medium', 'High']]
temperature_anova = stats.f_oneway(*temperature_groups)

# ANOVA Test for Wind Speed Effect on Happiness
windspeed_groups = [df[df['WindSpeed_Category'] == cat]['new_high'] for cat in ['Low', 'Medium', 'High']]
windspeed_anova = stats.f_oneway(*windspeed_groups)

# ANOVA Test for Min Temperature Effect on Happiness
mintemp_groups = [df[df['MinTemp_Category'] == cat]['new_high'] for cat in ['Low', 'Medium', 'High']]
mintemp_anova = stats.f_oneway(*mintemp_groups)

# ANOVA Test for Max Temperature Effect on Happiness
maxtemp_groups = [df[df['MaxTemp_Category'] == cat]['new_high'] for cat in ['Low', 'Medium', 'High']]
maxtemp_anova = stats.f_oneway(*maxtemp_groups)

# ANOVA Test for Avg Temperature Effect on Happiness
avgtemp_groups = [df[df['AvgTemp_Category'] == cat]['new_high'] for cat in ['Low', 'Medium', 'High']]
avgtemp_anova = stats.f_oneway(*avgtemp_groups)

# ANOVA Test for Avg Wind Gust Speed Effect on Happiness
windgustspeed_groups = [df[df['WindGustSpeed_Category'] == cat]['new_high'] for cat in ['Low', 'Medium', 'High']]
windgustspeed_anova = stats.f_oneway(*windgustspeed_groups)

# ANOVA Test for Avg Wind Direction on Happiness
winddirection_groups = [df[df['AvgWindDirection_Category'] == cat]['new_high'] for cat in ['Low', 'Medium', 'High']]
winddirection_anova = stats.f_oneway(*winddirection_groups)

# ANOVA Test for Latitude on Happiness
latitude_groups = [df[df['Latitude_Category'] == cat]['new_high'] for cat in ['Low', 'Medium', 'High']]
latitude_anova = stats.f_oneway(*latitude_groups)

# ANOVA Test for Longitude on Happiness
longitude_groups = [df[df['Longitude_Category'] == cat]['new_high'] for cat in ['Low', 'Medium', 'High']]
longitude_anova = stats.f_oneway(*longitude_groups)

# ANOVA Test for Avg Distance to Park on Happiness
avgdistpark_groups = [df[df['AvgDistPark_Category'] == cat]['new_high'] for cat in ['Low', 'Medium', 'High']]
avgdistpark_anova = stats.f_oneway(*avgdistpark_groups)

# ANOVA Test for Avg Park Size on Happiness
avgparksize_groups = [df[df['AvgSizePark_Category'] == cat]['new_high'] for cat in ['Low', 'Medium', 'High']]
avgparksize_anova = stats.f_oneway(*avgparksize_groups)

# ANOVA Test for Avg Population per Park on Happiness
avgpoppark_groups = [df[df['AvgPopPark_Category'] == cat]['new_high'] for cat in ['Low', 'Medium', 'High']]
avgpoppark_anova = stats.f_oneway(*avgpoppark_groups)

# ANOVA Test for Air Quality on Happiness
airquality_groups = [df[df['AirQuality_Category'] == cat]['new_high'] for cat in ['Low', 'Medium', 'High']]
airquality_anova = stats.f_oneway(*airquality_groups)

# ANOVA Test for Water Pollution on Happiness
waterpollution_groups = [df[df['WaterPollution_Category'] == cat]['new_high'] for cat in ['Low', 'Medium', 'High']]
waterpollution_anova = stats.f_oneway(*waterpollution_groups)

# ANOVA Test for Avg Dog Population per Household on Happiness
avgdogs_groups = [df[df['AvgDogs_Category'] == cat]['new_high'] for cat in ['Low', 'Medium', 'High']]
avgdogs_anova = stats.f_oneway(*avgdogs_groups)

# ANOVA Test for Avg Cat Population per Household on Happiness
avgcats_groups = [df[df['AvgCats_Category'] == cat]['new_high'] for cat in ['Low', 'Medium', 'High']]
avgcats_anova = stats.f_oneway(*avgcats_groups)

# ANOVA Test for median distance to park on Happiness
median_distance_park_groups = [df[df['MedianDistToPark_Category'] == cat]['new_high'] for cat in ['Low', 'Medium', 'High']]
median_distance_park_anova = stats.f_oneway(*median_distance_park_groups)

# ANOVA Test for average number of parks within 1000m to park on Happiness
AvgNbParksWithin1000m_groups = [df[df['AvgNbParksWithin1000m_Category'] == cat]['new_high'] for cat in ['Low', 'Medium', 'High']]
AvgNbParksWithin1000m_anova = stats.f_oneway(*AvgNbParksWithin1000m_groups)

# ANOVA Test for number of flats on Happiness
NbFlats_groups = [df[df['NbFlats_Category'] == cat]['new_high'] for cat in ['Low', 'Medium', 'High']]
NbFlats_anova = stats.f_oneway(*NbFlats_groups)

# ANOVA Test for number of flats with gardens on Happiness
NbFlatsWithGardens_groups = [df[df['NbFlatsWithGardens_Category'] == cat]['new_high'] for cat in ['Low', 'Medium', 'High']]
NbFlatsWithGardens_anova = stats.f_oneway(*NbFlatsWithGardens_groups)

# ANOVA Test for average garden size flats on Happiness
AvgGardenSizeFlats_groups = [df[df['AvgGardenSizeFlats_Category'] == cat]['new_high'] for cat in ['Low', 'Medium', 'High']]
AvgGardenSizeFlats_anova = stats.f_oneway(*AvgGardenSizeFlats_groups)

# ANOVA Test for number of houses on Happiness
NbHouses_groups = [df[df['NbHouses_Category'] == cat]['new_high'] for cat in ['Low', 'Medium', 'High']]
NbHouses_anova = stats.f_oneway(*NbHouses_groups)

# ANOVA Test for houses with gardens on Happiness
NbHousesWithGardens_groups = [df[df['NbHousesWithGardens_Category'] == cat]['new_high'] for cat in ['Low', 'Medium', 'High']]
NbHousesWithGardens_anova = stats.f_oneway(*NbHousesWithGardens_groups)

# ANOVA Test for average garden size houses on Happiness
AvgGardenSizeHouses_groups = [df[df['AvgGardenSizeHouses_Category'] == cat]['new_high'] for cat in ['Low', 'Medium', 'High']]
AvgGardenSizeHouses_anova = stats.f_oneway(*AvgGardenSizeHouses_groups)

# ANOVA Test for country on Happiness
country_groups = [df[df['country_name'] == country]['new_high'] for country in df['country_name'].unique()]
country_anova = stats.f_oneway(*country_groups)

# ANOVA Test for region on Happiness
region_groups = [df[df['region_name'] == region]['new_high'] for region in df['region_name'].unique()]
region_anova = stats.f_oneway(*region_groups)

# ANOVA Test for region on Happiness
county_groups = [df[df['county_name'] == county]['new_high'] for county in df['county_name'].unique()]
county_anova = stats.f_oneway(*county_groups)



print(f"ANOVA Results for Rainfall:\nF-Statistic: {rainfall_anova.statistic:.4f}, p-value: {rainfall_anova.pvalue:.4f}")
print(f"ANOVA Results for Temperature:\nF-Statistic: {temperature_anova.statistic:.4f}, p-value: {temperature_anova.pvalue:.4f}")
print(f"ANOVA Results for Wind Speed:\nF-Statistic: {windspeed_anova.statistic:.4f}, p-value: {windspeed_anova.pvalue:.4f}")
print(f"ANOVA Results for Min Temperature:\nF-Statistic: {mintemp_anova.statistic:.4f}, p-value: {mintemp_anova.pvalue:.4f}")
print(f"ANOVA Results for Max Temperature:\nF-Statistic: {maxtemp_anova.statistic:.4f}, p-value: {maxtemp_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Temperature:\nF-Statistic: {avgtemp_anova.statistic:.4f}, p-value: {avgtemp_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Wind Gust Speed:\nF-Statistic: {windgustspeed_anova.statistic:.4f}, p-value: {windgustspeed_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Wind Direction:\nF-Statistic: {winddirection_anova.statistic:.4f}, p-value: {winddirection_anova.pvalue:.4f}")
print(f"ANOVA Results for Latitude:\nF-Statistic: {latitude_anova.statistic:.4f}, p-value: {latitude_anova.pvalue:.4f}")
print(f"ANOVA Results for Longitude:\nF-Statistic: {longitude_anova.statistic:.4f}, p-value: {longitude_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Distance to Park:\nF-Statistic: {avgdistpark_anova.statistic:.4f}, p-value: {avgdistpark_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Park Size:\nF-Statistic: {avgparksize_anova.statistic:.4f}, p-value: {avgparksize_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Population per Park:\nF-Statistic: {avgpoppark_anova.statistic:.4f}, p-value: {avgpoppark_anova.pvalue:.4f}")
print(f"ANOVA Results for Air Quality:\nF-Statistic: {airquality_anova.statistic:.4f}, p-value: {airquality_anova.pvalue:.4f}")
print(f"ANOVA Results for Water Pollution:\nF-Statistic: {waterpollution_anova.statistic:.4f}, p-value: {waterpollution_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Dog Population per Household:\nF-Statistic: {avgdogs_anova.statistic:.4f}, p-value: {avgdogs_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Cat Population per Household:\nF-Statistic: {avgcats_anova.statistic:.4f}, p-value: {avgcats_anova.pvalue:.4f}")
print(f"ANOVA Results for median distance to park:\nF-Statistic: {median_distance_park_anova.statistic:.4f}, p-value: {median_distance_park_anova.pvalue:.4f}")
print(f"ANOVA Results for median size of park:\nF-Statistic: {median_size_park_anova.statistic:.4f}, p-value: {median_size_park_anova.pvalue:.4f}")
print(f"ANOVA Results for average number of parks within 1000m to park:\nF-Statistic: {AvgNbParksWithin1000m_anova.statistic:.4f}, p-value: {AvgNbParksWithin1000m_anova.pvalue:.4f}")
print(f"ANOVA Results for Number of Flats:\nF-Statistic: {NbFlats_anova.statistic:.4f}, p-value: {NbFlats_anova.pvalue:.4f}")
print(f"ANOVA Results for Number of Flats with Gardens:\nF-Statistic: {NbFlatsWithGardens_anova.statistic:.4f}, p-value: {NbFlatsWithGardens_anova.pvalue:.4f}")
print(f"ANOVA Results for Average Garden Size (Flats):\nF-Statistic: {AvgGardenSizeFlats_anova.statistic:.4f}, p-value: {AvgGardenSizeFlats_anova.pvalue:.4f}")
print(f"ANOVA Results for Number of Houses:\nF-Statistic: {NbHouses_anova.statistic:.4f}, p-value: {NbHouses_anova.pvalue:.4f}")
print(f"ANOVA Results for Number of Houses with Gardens:\nF-Statistic: {NbHousesWithGardens_anova.statistic:.4f}, p-value: {NbHousesWithGardens_anova.pvalue:.4f}")
print(f"ANOVA Results for Average Garden Size (Houses):\nF-Statistic: {AvgGardenSizeHouses_anova.statistic:.4f}, p-value: {AvgGardenSizeHouses_anova.pvalue:.4f}")
print(f"ANOVA Results for Country:\nF-Statistic: {country_anova.statistic:.4f}, p-value: {country_anova.pvalue:.4f}")
print(f"ANOVA Results for Region:\nF-Statistic: {region_anova.statistic:.4f}, p-value: {region_anova.pvalue:.4f}")
print(f"ANOVA Results for County:\nF-Statistic: {county_anova.statistic:.4f}, p-value: {county_anova.pvalue:.4f}\n")



# Interpretation for Rainfall
if rainfall_anova.pvalue < 0.05:
    print("✅ Rainfall has a significant effect on happiness (reject H₀).")
else:
    print("❌ Rainfall does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Temperature
if temperature_anova.pvalue < 0.05:
    print("✅ Temperature has a significant effect on happiness (reject H₀).")
else:
    print("❌ Temperature does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Wind Speed
if windspeed_anova.pvalue < 0.05:
    print("✅ Wind speed has a significant effect on happiness (reject H₀).")
else:
    print("❌ Wind speed does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Min Temperature
if mintemp_anova.pvalue < 0.05:
    print("✅ Min Temperature has a significant effect on happiness (reject H₀).")
else:
    print("❌ Min Temperature does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Max Temperature
if maxtemp_anova.pvalue < 0.05:
    print("✅ Max Temperature has a significant effect on happiness (reject H₀).")
else:
    print("❌ Max Temperature does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Temperature
if avgtemp_anova.pvalue < 0.05:
    print("✅ Avg Temperature has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Temperature does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Wind Gust Speed
if windgustspeed_anova.pvalue < 0.05:
    print("✅ Avg Wind Gust Speed has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Wind Gust Speed does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Wind Direction
if winddirection_anova.pvalue < 0.05:
    print("✅ Avg Wind Direction has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Wind Direction does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Latitude
if latitude_anova.pvalue < 0.05:
    print("✅ Latitude has a significant effect on happiness (reject H₀).")
else:
    print("❌ Latitude does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Longitude
if longitude_anova.pvalue < 0.05:
    print("✅ Longitude has a significant effect on happiness (reject H₀).")
else:
    print("❌ Longitude does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Distance to Park
if avgdistpark_anova.pvalue < 0.05:
    print("✅ Avg Distance to Park has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Distance to Park does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Park Size
if avgparksize_anova.pvalue < 0.05:
    print("✅ Avg Park Size has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Park Size does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Population per Park
if avgpoppark_anova.pvalue < 0.05:
    print("✅ Avg Population per Park has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Population per Park does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Air Quality
if airquality_anova.pvalue < 0.05:
    print("✅ Air Quality has a significant effect on happiness (reject H₀).")
else:
    print("❌ Air Quality does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Water Pollution
if waterpollution_anova.pvalue < 0.05:
    print("✅ Water Pollution has a significant effect on happiness (reject H₀).")
else:
    print("❌ Water Pollution does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Dog Population per Household
if avgdogs_anova.pvalue < 0.05:
    print("✅ Avg Dog Population per Household has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Dog Population per Household does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Cat Population per Household
if avgcats_anova.pvalue < 0.05:
    print("✅ Avg Cat Population per Household has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Cat Population per Household does not significantly affect happiness (fail to reject H₀).")

# Median Distance to Park
if median_distance_park_anova.pvalue < 0.05:
    print("✅ Median distance to park has a significant effect on happiness (reject H₀)")
else:
    print("❌ Median distance to park does not significantly affect happiness (fail to reject H₀)")

# Median Size of Park
if median_size_park_anova.pvalue < 0.05:
    print("✅ Median size of park has a significant effect on happiness (reject H₀)")
else:
    print("❌ Median size of park does not significantly affect happiness (fail to reject H₀)")

# Average Number of Parks Within 1000m
if AvgNbParksWithin1000m_anova.pvalue < 0.05:
    print("✅ Average number of parks within 1000m has a significant effect on happiness (reject H₀)")
else:
    print("❌ Average number of parks within 1000m does not significantly affect happiness (fail to reject H₀)")

# Number of Flats
if NbFlats_anova.pvalue < 0.05:
    print("✅ Number of flats has a significant effect on happiness (reject H₀)")
else:
    print("❌ Number of flats does not significantly affect happiness (fail to reject H₀)")

# Number of Flats with Gardens
if NbFlatsWithGardens_anova.pvalue < 0.05:
    print("✅ Number of flats with gardens has a significant effect on happiness (reject H₀)")
else:
    print("❌ Number of flats with gardens does not significantly affect happiness (fail to reject H₀)")

# Average Garden Size (Flats)
if AvgGardenSizeFlats_anova.pvalue < 0.05:
    print("✅ Average garden size for flats has a significant effect on happiness (reject H₀)")
else:
    print("❌ Average garden size for flats does not significantly affect happiness (fail to reject H₀)")

# Number of Houses
if NbHouses_anova.pvalue < 0.05:
    print("✅ Number of houses has a significant effect on happiness (reject H₀)")
else:
    print("❌ Number of houses does not significantly affect happiness (fail to reject H₀)")

# Number of Houses with Gardens
if NbHousesWithGardens_anova.pvalue < 0.05:
    print("✅ Number of houses with gardens has a significant effect on happiness (reject H₀)")
else:
    print("❌ Number of houses with gardens does not significantly affect happiness (fail to reject H₀)")

# Average Garden Size (Houses)
if AvgGardenSizeHouses_anova.pvalue < 0.05:
    print("✅ Average garden size for houses has a significant effect on happiness (reject H₀)")
else:
    print("❌ Average garden size for houses does not significantly affect happiness (fail to reject H₀)")

# ANOVA Results for Country
if country_anova.pvalue < 0.05:
    print("✅ Country has a significant effect on happiness (reject H₀)")
else:
    print("❌ Country does not significantly affect happiness (fail to reject H₀)")

# ANOVA Results for Region
if region_anova.pvalue < 0.05:
    print("✅ Region has a significant effect on happiness (reject H₀)")
else:
    print("❌ Region does not significantly affect happiness (fail to reject H₀)")

# ANOVA Results for County
if county_anova.pvalue < 0.05:
    print("✅ County has a significant effect on happiness (reject H₀)")
else:
    print("❌ County does not significantly affect happiness (fail to reject H₀)")



ANOVA Results for Rainfall:
F-Statistic: 1.2718, p-value: 0.2819
ANOVA Results for Temperature:
F-Statistic: 0.7650, p-value: 0.4663
ANOVA Results for Wind Speed:
F-Statistic: 0.3388, p-value: 0.7129
ANOVA Results for Min Temperature:
F-Statistic: 1.0312, p-value: 0.3579
ANOVA Results for Max Temperature:
F-Statistic: 0.8377, p-value: 0.4338
ANOVA Results for Avg Temperature:
F-Statistic: 0.7650, p-value: 0.4663
ANOVA Results for Avg Wind Gust Speed:
F-Statistic: 0.0196, p-value: 0.9806
ANOVA Results for Avg Wind Direction:
F-Statistic: 3.9646, p-value: 0.0200
ANOVA Results for Latitude:
F-Statistic: 2.0230, p-value: 0.1342
ANOVA Results for Longitude:
F-Statistic: 0.6965, p-value: 0.4992
ANOVA Results for Avg Distance to Park:
F-Statistic: 29.2583, p-value: 0.0000
ANOVA Results for Avg Park Size:
F-Statistic: 1.6837, p-value: 0.1878
ANOVA Results for Avg Population per Park:
F-Statistic: 2.1529, p-value: 0.1183
ANOVA Results for Air Quality:
F-Statistic: 2.2116, p-value: 0.1162
ANOVA 

  if _f_oneway_is_too_small(samples):


# ANOVA tests for the new low happiness category we created

In [8]:
#Measure for ANOVA
df = df.dropna(subset=['new_low'])

#Create 3 equal sized groups for ANOVA
df['Rainfall_Category'] = pd.qcut(df['total_rain_mm'], q=3, labels=['Low', 'Medium', 'High'])
df['Temperature_Category'] = pd.qcut(df['avg_temp_c'], q=3, labels=['Low', 'Medium', 'High'])
df['WindSpeed_Category'] = pd.qcut(df['avg_wind_speed_m_s'], q=3, labels=['Low', 'Medium', 'High'])
df['MinTemp_Category'] = pd.qcut(df['avg_min_temp_c1'], q=3, labels=['Low', 'Medium', 'High'])
df['MaxTemp_Category'] = pd.qcut(df['avg_max_temp_c'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgTemp_Category'] = pd.qcut(df['avg_temp_c'], q=3, labels=['Low', 'Medium', 'High'])
df['WindGustSpeed_Category'] = pd.qcut(df['avg_wind_gust_speed_m_s'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgWindDirection_Category'] = pd.qcut(df['avg_dominant_wind_direction'], q=3, labels=['Low', 'Medium', 'High'])
df['Latitude_Category'] = pd.qcut(df['Happiness_Latitude'], q=3, labels=['Low', 'Medium', 'High'])
df['Longitude_Category'] = pd.qcut(df['Happiness_Longitude'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgDistPark_Category'] = pd.qcut(df['avg_dist_to_park'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgSizePark_Category'] = pd.qcut(df['avg_size_park'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgPopPark_Category'] = pd.qcut(df['avg_pop_per_park'], q=3, labels=['Low', 'Medium', 'High'])
df['AirQuality_Category'] = pd.qcut(df['AirQuality'], q=3, labels=['Low', 'Medium', 'High'])
df['WaterPollution_Category'] = pd.qcut(df['WaterPollution'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgDogs_Category'] = pd.qcut(df['average_dogs_per_household'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgCats_Category'] = pd.qcut(df['average_cats_per_household'], q=3, labels=['Low', 'Medium', 'High'])
df['MedianDistToPark_Category'] = pd.qcut(df['median_dist_to_park'], q=3, labels=['Low', 'Medium', 'High'])
df['NbFlats_Category'] = pd.qcut(df['nb_flats'], q=3, labels=['Low', 'Medium', 'High'])
df['NbFlatsWithGardens_Category'] = pd.qcut(df['nb_flats_with_gardens'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgGardenSizeFlats_Category'] = pd.qcut(df['avg_garden_size_flats'], q=3, labels=['Low', 'Medium', 'High'])
df['NbHouses_Category'] = pd.qcut(df['nb_houses'], q=3, labels=['Low', 'Medium', 'High'])
df['NbHousesWithGardens_Category'] = pd.qcut(df['nb_houses_with_gardens'], q=3, labels=['Low', 'Medium', 'High'])
df['AvgGardenSizeHouses_Category'] = pd.qcut(df['avg_garden_size_houses'], q=3, labels=['Low', 'Medium', 'High'])


# ANOVA Test for Rainfall Effect on Happiness
rainfall_groups = [df[df['Rainfall_Category'] == cat]['new_low'] for cat in ['Low', 'Medium', 'High']]
rainfall_anova = stats.f_oneway(*rainfall_groups)

# ANOVA Test for Temperature Effect on Happiness
temperature_groups = [df[df['Temperature_Category'] == cat]['new_low'] for cat in ['Low', 'Medium', 'High']]
temperature_anova = stats.f_oneway(*temperature_groups)

# ANOVA Test for Wind Speed Effect on Happiness
windspeed_groups = [df[df['WindSpeed_Category'] == cat]['new_low'] for cat in ['Low', 'Medium', 'High']]
windspeed_anova = stats.f_oneway(*windspeed_groups)

# ANOVA Test for Min Temperature Effect on Happiness
mintemp_groups = [df[df['MinTemp_Category'] == cat]['new_low'] for cat in ['Low', 'Medium', 'High']]
mintemp_anova = stats.f_oneway(*mintemp_groups)

# ANOVA Test for Max Temperature Effect on Happiness
maxtemp_groups = [df[df['MaxTemp_Category'] == cat]['new_low'] for cat in ['Low', 'Medium', 'High']]
maxtemp_anova = stats.f_oneway(*maxtemp_groups)

# ANOVA Test for Avg Temperature Effect on Happiness
avgtemp_groups = [df[df['AvgTemp_Category'] == cat]['new_low'] for cat in ['Low', 'Medium', 'High']]
avgtemp_anova = stats.f_oneway(*avgtemp_groups)

# ANOVA Test for Avg Wind Gust Speed Effect on Happiness
windgustspeed_groups = [df[df['WindGustSpeed_Category'] == cat]['new_low'] for cat in ['Low', 'Medium', 'High']]
windgustspeed_anova = stats.f_oneway(*windgustspeed_groups)

# ANOVA Test for Avg Wind Direction on Happiness
winddirection_groups = [df[df['AvgWindDirection_Category'] == cat]['new_low'] for cat in ['Low', 'Medium', 'High']]
winddirection_anova = stats.f_oneway(*winddirection_groups)

# ANOVA Test for Latitude on Happiness
latitude_groups = [df[df['Latitude_Category'] == cat]['new_low'] for cat in ['Low', 'Medium', 'High']]
latitude_anova = stats.f_oneway(*latitude_groups)

# ANOVA Test for Longitude on Happiness
longitude_groups = [df[df['Longitude_Category'] == cat]['new_low'] for cat in ['Low', 'Medium', 'High']]
longitude_anova = stats.f_oneway(*longitude_groups)

# ANOVA Test for Avg Distance to Park on Happiness
avgdistpark_groups = [df[df['AvgDistPark_Category'] == cat]['new_low'] for cat in ['Low', 'Medium', 'High']]
avgdistpark_anova = stats.f_oneway(*avgdistpark_groups)

# ANOVA Test for Avg Park Size on Happiness
avgparksize_groups = [df[df['AvgSizePark_Category'] == cat]['new_low'] for cat in ['Low', 'Medium', 'High']]
avgparksize_anova = stats.f_oneway(*avgparksize_groups)

# ANOVA Test for Avg Population per Park on Happiness
avgpoppark_groups = [df[df['AvgPopPark_Category'] == cat]['new_low'] for cat in ['Low', 'Medium', 'High']]
avgpoppark_anova = stats.f_oneway(*avgpoppark_groups)

# ANOVA Test for Air Quality on Happiness
airquality_groups = [df[df['AirQuality_Category'] == cat]['new_low'] for cat in ['Low', 'Medium', 'High']]
airquality_anova = stats.f_oneway(*airquality_groups)

# ANOVA Test for Water Pollution on Happiness
waterpollution_groups = [df[df['WaterPollution_Category'] == cat]['new_low'] for cat in ['Low', 'Medium', 'High']]
waterpollution_anova = stats.f_oneway(*waterpollution_groups)

# ANOVA Test for Avg Dog Population per Household on Happiness
avgdogs_groups = [df[df['AvgDogs_Category'] == cat]['new_low'] for cat in ['Low', 'Medium', 'High']]
avgdogs_anova = stats.f_oneway(*avgdogs_groups)

# ANOVA Test for Avg Cat Population per Household on Happiness
avgcats_groups = [df[df['AvgCats_Category'] == cat]['new_low'] for cat in ['Low', 'Medium', 'High']]
avgcats_anova = stats.f_oneway(*avgcats_groups)

# ANOVA Test for median distance to park on Happiness
median_distance_park_groups = [df[df['MedianDistToPark_Category'] == cat]['new_low'] for cat in ['Low', 'Medium', 'High']]
median_distance_park_anova = stats.f_oneway(*median_distance_park_groups)

# ANOVA Test for median distance to park on Happiness
median_size_park_groups = [df[df['MedianSizePark_Category'] == cat]['new_low'] for cat in ['Low', 'Medium', 'High']]
median_size_park_anova = stats.f_oneway(*median_size_park_groups)

# ANOVA Test for average number of parks within 1000m to park on Happiness
AvgNbParksWithin1000m_groups = [df[df['AvgNbParksWithin1000m_Category'] == cat]['new_low'] for cat in ['Low', 'Medium', 'High']]
AvgNbParksWithin1000m_anova = stats.f_oneway(*AvgNbParksWithin1000m_groups)

# ANOVA Test for number of flats on Happiness
NbFlats_groups = [df[df['NbFlats_Category'] == cat]['new_low'] for cat in ['Low', 'Medium', 'High']]
NbFlats_anova = stats.f_oneway(*NbFlats_groups)

# ANOVA Test for number of flats with gardens on Happiness
NbFlatsWithGardens_groups = [df[df['NbFlatsWithGardens_Category'] == cat]['new_low'] for cat in ['Low', 'Medium', 'High']]
NbFlatsWithGardens_anova = stats.f_oneway(*NbFlatsWithGardens_groups)

# ANOVA Test for average garden size flats on Happiness
AvgGardenSizeFlats_groups = [df[df['AvgGardenSizeFlats_Category'] == cat]['new_low'] for cat in ['Low', 'Medium', 'High']]
AvgGardenSizeFlats_anova = stats.f_oneway(*AvgGardenSizeFlats_groups)

# ANOVA Test for number of houses on Happiness
NbHouses_groups = [df[df['NbHouses_Category'] == cat]['new_low'] for cat in ['Low', 'Medium', 'High']]
NbHouses_anova = stats.f_oneway(*NbHouses_groups)

# ANOVA Test for houses with gardens on Happiness
NbHousesWithGardens_groups = [df[df['NbHousesWithGardens_Category'] == cat]['new_low'] for cat in ['Low', 'Medium', 'High']]
NbHousesWithGardens_anova = stats.f_oneway(*NbHousesWithGardens_groups)

# ANOVA Test for average garden size houses on Happiness
AvgGardenSizeHouses_groups = [df[df['AvgGardenSizeHouses_Category'] == cat]['new_low'] for cat in ['Low', 'Medium', 'High']]
AvgGardenSizeHouses_anova = stats.f_oneway(*AvgGardenSizeHouses_groups)

# ANOVA Test for country on Happiness
country_groups = [df[df['country_name'] == country]['new_low'] for country in df['country_name'].unique()]
country_anova = stats.f_oneway(*country_groups)

# ANOVA Test for region on Happiness
region_groups = [df[df['region_name'] == region]['new_low'] for region in df['region_name'].unique()]
region_anova = stats.f_oneway(*region_groups)

# ANOVA Test for region on Happiness
county_groups = [df[df['county_name'] == county]['new_low'] for county in df['county_name'].unique()]
county_anova = stats.f_oneway(*county_groups)



print(f"ANOVA Results for Rainfall:\nF-Statistic: {rainfall_anova.statistic:.4f}, p-value: {rainfall_anova.pvalue:.4f}")
print(f"ANOVA Results for Temperature:\nF-Statistic: {temperature_anova.statistic:.4f}, p-value: {temperature_anova.pvalue:.4f}")
print(f"ANOVA Results for Wind Speed:\nF-Statistic: {windspeed_anova.statistic:.4f}, p-value: {windspeed_anova.pvalue:.4f}")
print(f"ANOVA Results for Min Temperature:\nF-Statistic: {mintemp_anova.statistic:.4f}, p-value: {mintemp_anova.pvalue:.4f}")
print(f"ANOVA Results for Max Temperature:\nF-Statistic: {maxtemp_anova.statistic:.4f}, p-value: {maxtemp_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Temperature:\nF-Statistic: {avgtemp_anova.statistic:.4f}, p-value: {avgtemp_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Wind Gust Speed:\nF-Statistic: {windgustspeed_anova.statistic:.4f}, p-value: {windgustspeed_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Wind Direction:\nF-Statistic: {winddirection_anova.statistic:.4f}, p-value: {winddirection_anova.pvalue:.4f}")
print(f"ANOVA Results for Latitude:\nF-Statistic: {latitude_anova.statistic:.4f}, p-value: {latitude_anova.pvalue:.4f}")
print(f"ANOVA Results for Longitude:\nF-Statistic: {longitude_anova.statistic:.4f}, p-value: {longitude_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Distance to Park:\nF-Statistic: {avgdistpark_anova.statistic:.4f}, p-value: {avgdistpark_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Park Size:\nF-Statistic: {avgparksize_anova.statistic:.4f}, p-value: {avgparksize_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Population per Park:\nF-Statistic: {avgpoppark_anova.statistic:.4f}, p-value: {avgpoppark_anova.pvalue:.4f}")
print(f"ANOVA Results for Air Quality:\nF-Statistic: {airquality_anova.statistic:.4f}, p-value: {airquality_anova.pvalue:.4f}")
print(f"ANOVA Results for Water Pollution:\nF-Statistic: {waterpollution_anova.statistic:.4f}, p-value: {waterpollution_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Dog Population per Household:\nF-Statistic: {avgdogs_anova.statistic:.4f}, p-value: {avgdogs_anova.pvalue:.4f}")
print(f"ANOVA Results for Avg Cat Population per Household:\nF-Statistic: {avgcats_anova.statistic:.4f}, p-value: {avgcats_anova.pvalue:.4f}")
print(f"ANOVA Results for median distance to park:\nF-Statistic: {median_distance_park_anova.statistic:.4f}, p-value: {median_distance_park_anova.pvalue:.4f}")
print(f"ANOVA Results for median size of park:\nF-Statistic: {median_size_park_anova.statistic:.4f}, p-value: {median_size_park_anova.pvalue:.4f}")
print(f"ANOVA Results for average number of parks within 1000m to park:\nF-Statistic: {AvgNbParksWithin1000m_anova.statistic:.4f}, p-value: {AvgNbParksWithin1000m_anova.pvalue:.4f}")
print(f"ANOVA Results for Number of Flats:\nF-Statistic: {NbFlats_anova.statistic:.4f}, p-value: {NbFlats_anova.pvalue:.4f}")
print(f"ANOVA Results for Number of Flats with Gardens:\nF-Statistic: {NbFlatsWithGardens_anova.statistic:.4f}, p-value: {NbFlatsWithGardens_anova.pvalue:.4f}")
print(f"ANOVA Results for Average Garden Size (Flats):\nF-Statistic: {AvgGardenSizeFlats_anova.statistic:.4f}, p-value: {AvgGardenSizeFlats_anova.pvalue:.4f}")
print(f"ANOVA Results for Number of Houses:\nF-Statistic: {NbHouses_anova.statistic:.4f}, p-value: {NbHouses_anova.pvalue:.4f}")
print(f"ANOVA Results for Number of Houses with Gardens:\nF-Statistic: {NbHousesWithGardens_anova.statistic:.4f}, p-value: {NbHousesWithGardens_anova.pvalue:.4f}")
print(f"ANOVA Results for Average Garden Size (Houses):\nF-Statistic: {AvgGardenSizeHouses_anova.statistic:.4f}, p-value: {AvgGardenSizeHouses_anova.pvalue:.4f}")
print(f"ANOVA Results for Country:\nF-Statistic: {country_anova.statistic:.4f}, p-value: {country_anova.pvalue:.4f}")
print(f"ANOVA Results for Region:\nF-Statistic: {region_anova.statistic:.4f}, p-value: {region_anova.pvalue:.4f}")
print(f"ANOVA Results for County:\nF-Statistic: {county_anova.statistic:.4f}, p-value: {county_anova.pvalue:.4f}\n")



# Interpretation for Rainfall
if rainfall_anova.pvalue < 0.05:
    print("✅ Rainfall has a significant effect on happiness (reject H₀).")
else:
    print("❌ Rainfall does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Temperature
if temperature_anova.pvalue < 0.05:
    print("✅ Temperature has a significant effect on happiness (reject H₀).")
else:
    print("❌ Temperature does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Wind Speed
if windspeed_anova.pvalue < 0.05:
    print("✅ Wind speed has a significant effect on happiness (reject H₀).")
else:
    print("❌ Wind speed does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Min Temperature
if mintemp_anova.pvalue < 0.05:
    print("✅ Min Temperature has a significant effect on happiness (reject H₀).")
else:
    print("❌ Min Temperature does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Max Temperature
if maxtemp_anova.pvalue < 0.05:
    print("✅ Max Temperature has a significant effect on happiness (reject H₀).")
else:
    print("❌ Max Temperature does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Temperature
if avgtemp_anova.pvalue < 0.05:
    print("✅ Avg Temperature has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Temperature does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Wind Gust Speed
if windgustspeed_anova.pvalue < 0.05:
    print("✅ Avg Wind Gust Speed has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Wind Gust Speed does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Wind Direction
if winddirection_anova.pvalue < 0.05:
    print("✅ Avg Wind Direction has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Wind Direction does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Latitude
if latitude_anova.pvalue < 0.05:
    print("✅ Latitude has a significant effect on happiness (reject H₀).")
else:
    print("❌ Latitude does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Longitude
if longitude_anova.pvalue < 0.05:
    print("✅ Longitude has a significant effect on happiness (reject H₀).")
else:
    print("❌ Longitude does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Distance to Park
if avgdistpark_anova.pvalue < 0.05:
    print("✅ Avg Distance to Park has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Distance to Park does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Park Size
if avgparksize_anova.pvalue < 0.05:
    print("✅ Avg Park Size has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Park Size does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Population per Park
if avgpoppark_anova.pvalue < 0.05:
    print("✅ Avg Population per Park has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Population per Park does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Air Quality
if airquality_anova.pvalue < 0.05:
    print("✅ Air Quality has a significant effect on happiness (reject H₀).")
else:
    print("❌ Air Quality does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Water Pollution
if waterpollution_anova.pvalue < 0.05:
    print("✅ Water Pollution has a significant effect on happiness (reject H₀).")
else:
    print("❌ Water Pollution does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Dog Population per Household
if avgdogs_anova.pvalue < 0.05:
    print("✅ Avg Dog Population per Household has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Dog Population per Household does not significantly affect happiness (fail to reject H₀).")

# Interpretation for Avg Cat Population per Household
if avgcats_anova.pvalue < 0.05:
    print("✅ Avg Cat Population per Household has a significant effect on happiness (reject H₀).")
else:
    print("❌ Avg Cat Population per Household does not significantly affect happiness (fail to reject H₀).")

# Median Distance to Park
if median_distance_park_anova.pvalue < 0.05:
    print("✅ Median distance to park has a significant effect on happiness (reject H₀)")
else:
    print("❌ Median distance to park does not significantly affect happiness (fail to reject H₀)")

# Median Size of Park
if median_size_park_anova.pvalue < 0.05:
    print("✅ Median size of park has a significant effect on happiness (reject H₀)")
else:
    print("❌ Median size of park does not significantly affect happiness (fail to reject H₀)")

# Average Number of Parks Within 1000m
if AvgNbParksWithin1000m_anova.pvalue < 0.05:
    print("✅ Average number of parks within 1000m has a significant effect on happiness (reject H₀)")
else:
    print("❌ Average number of parks within 1000m does not significantly affect happiness (fail to reject H₀)")

# Number of Flats
if NbFlats_anova.pvalue < 0.05:
    print("✅ Number of flats has a significant effect on happiness (reject H₀)")
else:
    print("❌ Number of flats does not significantly affect happiness (fail to reject H₀)")

# Number of Flats with Gardens
if NbFlatsWithGardens_anova.pvalue < 0.05:
    print("✅ Number of flats with gardens has a significant effect on happiness (reject H₀)")
else:
    print("❌ Number of flats with gardens does not significantly affect happiness (fail to reject H₀)")

# Average Garden Size (Flats)
if AvgGardenSizeFlats_anova.pvalue < 0.05:
    print("✅ Average garden size for flats has a significant effect on happiness (reject H₀)")
else:
    print("❌ Average garden size for flats does not significantly affect happiness (fail to reject H₀)")

# Number of Houses
if NbHouses_anova.pvalue < 0.05:
    print("✅ Number of houses has a significant effect on happiness (reject H₀)")
else:
    print("❌ Number of houses does not significantly affect happiness (fail to reject H₀)")

# Number of Houses with Gardens
if NbHousesWithGardens_anova.pvalue < 0.05:
    print("✅ Number of houses with gardens has a significant effect on happiness (reject H₀)")
else:
    print("❌ Number of houses with gardens does not significantly affect happiness (fail to reject H₀)")

# Average Garden Size (Houses)
if AvgGardenSizeHouses_anova.pvalue < 0.05:
    print("✅ Average garden size for houses has a significant effect on happiness (reject H₀)")
else:
    print("❌ Average garden size for houses does not significantly affect happiness (fail to reject H₀)")

# ANOVA Results for Country
if country_anova.pvalue < 0.05:
    print("✅ Country has a significant effect on happiness (reject H₀)")
else:
    print("❌ Country does not significantly affect happiness (fail to reject H₀)")

# ANOVA Results for Region
if region_anova.pvalue < 0.05:
    print("✅ Region has a significant effect on happiness (reject H₀)")
else:
    print("❌ Region does not significantly affect happiness (fail to reject H₀)")

# ANOVA Results for County
if county_anova.pvalue < 0.05:
    print("✅ County has a significant effect on happiness (reject H₀)")
else:
    print("❌ County does not significantly affect happiness (fail to reject H₀)")



ANOVA Results for Rainfall:
F-Statistic: 1.2671, p-value: 0.2832
ANOVA Results for Temperature:
F-Statistic: 0.7678, p-value: 0.4650
ANOVA Results for Wind Speed:
F-Statistic: 0.3401, p-value: 0.7120
ANOVA Results for Min Temperature:
F-Statistic: 1.0321, p-value: 0.3576
ANOVA Results for Max Temperature:
F-Statistic: 0.8396, p-value: 0.4330
ANOVA Results for Avg Temperature:
F-Statistic: 0.7678, p-value: 0.4650
ANOVA Results for Avg Wind Gust Speed:
F-Statistic: 0.0198, p-value: 0.9804
ANOVA Results for Avg Wind Direction:
F-Statistic: 3.9705, p-value: 0.0199
ANOVA Results for Latitude:
F-Statistic: 2.0333, p-value: 0.1328
ANOVA Results for Longitude:
F-Statistic: 0.6895, p-value: 0.5027
ANOVA Results for Avg Distance to Park:
F-Statistic: 29.2727, p-value: 0.0000
ANOVA Results for Avg Park Size:
F-Statistic: 1.6829, p-value: 0.1880
ANOVA Results for Avg Population per Park:
F-Statistic: 2.1536, p-value: 0.1182
ANOVA Results for Air Quality:
F-Statistic: 2.2101, p-value: 0.1163
ANOVA 

  if _f_oneway_is_too_small(samples):
