In [16]:
### Étape 1 : Chargement des bibliothèques
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm



In [17]:
### Étape 2 : Chargement des données
gdp_file = "gdp_per_capita.csv"
urban_file = "urbanisation.csv"
edu_file = "enseignement.csv"

df_gdp = pd.read_csv(gdp_file, skiprows=3)
df_urban = pd.read_csv(urban_file, skiprows=3)
df_edu = pd.read_csv(edu_file, skiprows=3)

FileNotFoundError: [Errno 2] No such file or directory: 'enseignement.csv'

In [None]:

### Étape 3 : Exploration des données
print(df_gdp.head())
print(df_urban.head())
print(df_edu.head())


In [None]:
### Étape 4 : Transformation en format long
df_gdp_melted = df_gdp.melt(id_vars=["Country Name", "Country Code"], var_name="Year", value_name="GDP_per_capita")
df_urban_melted = df_urban.melt(id_vars=["Country Name", "Country Code"], var_name="Year", value_name="Urbanization_rate")
df_edu_melted = df_edu.melt(id_vars=["Country Name", "Country Code"], var_name="Year", value_name="Secondary_Education_Rate")

In [None]:

### Étape 5 : Fusion des datasets
df_gdp_melted["Year"] = pd.to_numeric(df_gdp_melted["Year"], errors='coerce')
df_urban_melted["Year"] = pd.to_numeric(df_urban_melted["Year"], errors='coerce')
df_edu_melted["Year"] = pd.to_numeric(df_edu_melted["Year"], errors='coerce')

df_merged = pd.merge(df_gdp_melted, df_urban_melted, on=["Country Name", "Country Code", "Year"], how="inner")
df_merged = pd.merge(df_merged, df_edu_melted, on=["Country Name", "Country Code", "Year"], how="inner")
df_merged.dropna(inplace=True)

In [None]:

### Étape 6 : Visualisation initiale
plt.figure(figsize=(10,6))
sns.scatterplot(x=df_merged["Urbanization_rate"], y=df_merged["GDP_per_capita"], alpha=0.5)
plt.xlabel("Taux d'urbanisation (%)")
plt.ylabel("PIB par habitant ($US)")
plt.title("Nuage de points : PIB vs Urbanisation")
plt.show()

In [None]:

### Étape 7 : Régression linéaire
X = sm.add_constant(df_merged["Urbanization_rate"])
y = df_merged["GDP_per_capita"]
model = sm.OLS(y, X).fit()
print(model.summary())

In [None]:

### Étape 8 : Transformation logarithmique
df_merged = df_merged[(df_merged["GDP_per_capita"] > 0) & (df_merged["Urbanization_rate"] > 0) & (df_merged["Secondary_Education_Rate"] > 0)]
df_merged["log_GDP_per_capita"] = np.log(df_merged["GDP_per_capita"])
df_merged["log_Urbanization_rate"] = np.log(df_merged["Urbanization_rate"])
df_merged["log_Secondary_Education"] = np.log(df_merged["Secondary_Education_Rate"])

In [None]:

### Étape 9 : Régression log-log
X_log = sm.add_constant(df_merged[["log_Urbanization_rate", "log_Secondary_Education"]])
y_log = df_merged["log_GDP_per_capita"]
model_log = sm.OLS(y_log, X_log).fit()
print(model_log.summary())

In [None]:

### Étape 10 : Visualisation finale
plt.figure(figsize=(10,6))
scatter = plt.scatter(df_merged["log_Urbanization_rate"], df_merged["log_GDP_per_capita"], c=df_merged["log_Secondary_Education"], cmap="coolwarm", alpha=0.5)
plt.colorbar(scatter, label="Log de l'accès à l'éducation secondaire")
plt.xlabel("Log du Taux d'urbanisation (%)")
plt.ylabel("Log du PIB par habitant")
plt.title("Relation entre Log(Taux d'Urbanisation), Log(PIB) et Éducation")
plt.show()

In [None]:

### Étape 11 : Conclusion
print("L'urbanisation est positivement corrélée à la croissance économique, et l'éducation renforce cette relation.")
