In [None]:
import pandas as pd
import numpy as np
from scipy.stats import ttest_ind

In [None]:
df=pd.read_csv('../data/clean/nomads_all_merged.csv', sep=";")

In [None]:
df

## Hypothesis testing 1
#### Null Hypothesis (H₀):
##### There is no significant difference in the number of digital nomads between low-cost and high-cost cities.


#### Alternative Hypothesis (H₁):
##### Low-cost-of-living cities have significantly more digital nomads than high-cost-of-living cities.

In [None]:
# Calculate the sum of digital_nomads_count grouped by cost_of_living_category
df_cat = df.groupby("cost_of_living_category")["digital_nomads_count"].sum().round().reset_index().sort_values(by="digital_nomads_count", ascending=False)
df_cat

In [None]:
low_cost = df_cat[df_cat["cost_of_living_category"].isin(["Very Low", "Low"])]["digital_nomads_count"]
high_cost = df_cat[df_cat["cost_of_living_category"].isin(["High", "Very High"])]["digital_nomads_count"]

In [None]:
t_stat, p_value = ttest_ind(low_cost, high_cost, equal_var=False)
results = pd.DataFrame({"Test": ["Independent t-test"], "T-Statistic": [t_stat], "P-Value": [p_value]})

In [None]:
alpha = 0.05  # Significance level

if p_value < alpha:
    interpretation = "Reject H₀: There is a significant difference, and low-cost cities attract significantly more digital nomads."
else:
    interpretation = "Fail to reject H₀: There is no significant difference in digital nomads between low-cost and high-cost cities."

interpretation

In [None]:
results

## Hypothesis testing 2
#### Null Hypothesis (H₀): 
##### There is no significant difference in the number of digital nomads between low-quality-of-life and high-quality-of-life cities.
#### Alternative Hypothesis (H₁): 
##### High-quality-of-life cities attract significantly more digital nomads than low-quality-of-life cities.

In [None]:
df_cat = df.groupby("quality_of_life_category")["digital_nomads_count"].sum().round().reset_index().sort_values(by="digital_nomads_count", ascending=False)
df_cat

In [None]:
low_qol = df_cat[df_cat["quality_of_life_category"].isin(["Very Low", "Low"])]["digital_nomads_count"]
high_qol = df_cat[df_cat["quality_of_life_category"].isin(["High", "Very High"])]["digital_nomads_count"]

t_stat_qol, p_value_qol = ttest_ind(low_qol, high_qol, equal_var=False)

results_qol = pd.DataFrame({"Test": ["Independent t-test"], "T-Statistic": [t_stat_qol], "P-Value": [p_value_qol]})

In [None]:
alpha = 0.05

if p_value_qol < alpha:
    interpretation_qol = "Reject H₀: There is a significant difference, and high-quality-of-life cities attract significantly more digital nomads."
else:
    interpretation_qol = "Fail to reject H₀: There is no significant difference in digital nomads between low-quality and high-quality cities."

interpretation_qol

In [None]:
results_qol