### Chargement des packages

In [2]:
import csv
import pandas as pd

# Données annuelles

## Chargement des données

In [119]:
GDP_yearly = pd.read_csv('../Data/GDP_yearly.csv')

In [120]:
GDP_yearly.rename(columns={GDP_yearly.columns[0] : 'Year'}, inplace=True)

In [121]:
GDP_yearly = GDP_yearly.iloc[1:]

In [122]:
GDP_yearly.head()

Unnamed: 0,Year,Albania,Advanced Economies,Argentina,Australia,Austria,Belgium,Bulgaria,Bahrain,Bosnia and Herzegovina,...,Sweden,Thailand,Tunisia,Turkey,"Taiwan, China",Ukraine,Uruguay,United States,World (WBG members),South Africa
1,1995.0,,31811706.0,263941.7,721440.7,300536.7,364273.3,36020.68,,,...,337452.5,199826.3,,425519.3,220503.9,,,10161774.0,40541300.0,246081.0
2,1996.0,,32739377.0,278378.9,750107.6,306832.4,369087.2,38456.56,,,...,343263.6,211192.2,,456775.6,234123.5,,,10545166.0,41920772.0,256662.6
3,1997.0,,33848979.0,300949.2,785460.1,313689.7,383089.1,32924.43,,,...,354558.9,205451.0,,490623.4,248291.3,,32837.01,11014212.0,43643455.0,263335.7
4,1998.0,,34748425.0,312429.1,822151.8,324494.7,390604.3,34214.57,,,...,369272.7,189673.3,,492792.6,258731.5,,34324.81,11507833.0,44732271.0,264652.4
5,1999.0,,35919081.0,301986.4,858627.3,336360.4,404442.0,31358.14,,,...,383804.5,198465.0,,476598.7,276149.2,,33669.94,12058971.0,46236350.0,271004.1


## Traitement des valeurs manquantes

In [123]:
missing_values = GDP_yearly.iloc[:, 1:].isnull().sum()

In [124]:
missing_values

Albania                13
Advanced Economies      0
Argentina               0
Australia               0
Austria                 0
                       ..
Ukraine                 6
Uruguay                 2
United States           0
World (WBG members)     0
South Africa            0
Length: 100, dtype: int64

#### Première solution : on supprime tous les pays avec NA
On garde une première base de données qui ne contient que les pays avec aucune valeur manquante : total de 62 pays et longueur total de 30 ans.

In [125]:
# Nombre de colonnes sans aucune valeur manquante
num_cols_no_missing = (missing_values == 0).sum()
print(f"\nNumber of countries without missing values : {num_cols_no_missing}")


Number of countries without missing values : 62


In [126]:
# Sélectionner les colonnes avec 0 valeurs manquantes
cols_with_no_missing = missing_values[missing_values == 0]

# Sélectionner la première colonne (les dates)
dates_column = GDP_yearly.iloc[:, 0]

# Filtrer les colonnes avec 0 valeurs manquantes et ajouter la première colonne
GDP_yearly_30 = pd.concat([dates_column, GDP_yearly[cols_with_no_missing.index]], axis=1)

# Mettre la première colonne (les dates) en indice
GDP_yearly_30.set_index(dates_column.name, inplace=True)

In [127]:
# Affichage du résultat
GDP_yearly_30

Unnamed: 0_level_0,Advanced Economies,Argentina,Australia,Austria,Belgium,Bulgaria,Bolivia,Brazil,Botswana,Canada,...,Serbia,Slovakia,Slovenia,Sweden,Thailand,Turkey,"Taiwan, China",United States,World (WBG members),South Africa
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1995.0,31811706.0,263941.7,721440.7,300536.7,364273.3,36020.68,10811.53,1301438.0,7336.007,1088585.0,...,25017.32,50097.91,31363.95,337452.5,199826.3,425519.3,220503.9,10161774.0,40541300.0,246081.0
1996.0,32739377.0,278378.9,750107.6,306832.4,369087.2,38456.56,11279.47,1326118.0,7511.312,1106971.0,...,26498.96,53138.26,32358.35,343263.6,211192.2,456775.6,234123.5,10545166.0,41920772.0,256662.6
1997.0,33848979.0,300949.2,785460.1,313689.7,383089.1,32924.43,11834.54,1375538.0,8048.923,1154351.0,...,28833.51,56014.24,33952.29,354558.9,205451.0,490623.4,248291.3,11014212.0,43643455.0,263335.7
1998.0,34748425.0,312429.1,822151.8,324494.7,390604.3,34214.57,12428.37,1380438.0,7920.324,1199289.0,...,30453.7,58152.23,34956.07,369272.7,189673.3,492792.6,258731.5,11507833.0,44732271.0,264652.4
1999.0,35919081.0,301986.4,858627.3,336360.4,404442.0,31358.14,12477.49,1386899.0,8569.005,1260944.0,...,27298.09,57874.93,36732.19,383804.5,198465.0,476598.7,276149.2,12058971.0,46236350.0,271004.1
2000.0,37382444.0,299678.3,885234.2,347857.2,419474.7,32680.04,12788.76,1446778.0,8911.75,1325739.0,...,29012.44,58299.67,38191.01,402314.2,207304.0,509954.1,293586.0,12550632.0,48292596.0,282386.3
2001.0,37949845.0,286415.2,908255.9,352327.8,424087.2,33950.6,13008.93,1468145.0,9128.795,1350597.0,...,30892.08,60012.34,39314.18,408516.8,214429.0,480103.2,289470.4,12670565.0,49199238.0,290010.7
2002.0,38509270.0,255431.4,946033.3,357533.9,431325.9,35904.33,13338.36,1512544.0,9798.019,1391105.0,...,32901.86,62656.91,40563.49,417733.3,227605.0,510852.0,305344.0,12886034.0,50312988.0,300742.2
2003.0,39254192.0,278220.0,974561.1,361492.9,435802.0,37787.7,13699.69,1530224.0,10328.23,1416234.0,...,34418.0,65743.52,41830.33,426081.5,243894.5,541334.1,318243.4,13246277.0,51694933.0,309611.2
2004.0,40443966.0,291156.7,1014647.0,370404.1,451365.7,40285.19,14270.79,1613263.0,10468.58,1460029.0,...,36687.66,69291.49,43519.64,441408.8,259197.4,592996.6,340367.4,13756013.0,53726238.0,323712.8


In [128]:
GDP_yearly_30.to_csv('GDP_yearly_30', index = True)

#### Deuxième solution : on supprime les premières années qui présentent beaucoup de valeurs manquantes
On note que les valeurs manquantes se trouvent dans les années les plus anciennes de la série. Supprimer les 3 ou 5 premières années permet donc de conserver un certain nombre de pays qui ne présentent que quelques valeurs manquantes. On garde à chaque fois les pays ne présentant plus de valeurs manquantes sur les années suivantes.
Permet d'obtenir : 
- En supprimant les 3 premières années => T=27, N=69
- En supprimant les 5 premières années => T=25, N=80

In [129]:
# Colonnes avec entre 1 et 3 valeurs manquantes (inclus)
cols_with_some_missing = missing_values[(missing_values >= 1) & (missing_values <= 3)]
print("Colonnes avec entre 1 et 3 valeurs manquantes :")
print(cols_with_some_missing)

Colonnes avec entre 1 et 3 valeurs manquantes :
Chile                         1
Czech Republic                1
EMDE Europe & Central Asia    1
India                         1
Morocco                       3
EMDE South Asia               2
Uruguay                       2
dtype: int64


In [130]:
# Afficher les colonnes du DataFrame avec entre 1 et 3 valeurs manquantes
df_with_some_missing = GDP_yearly[cols_with_some_missing.index]
print("Colonnes avec entre 1 et 3 valeurs manquantes :")
df_with_some_missing

Colonnes avec entre 1 et 3 valeurs manquantes :


Unnamed: 0,Chile,Czech Republic,EMDE Europe & Central Asia,India,Morocco,EMDE South Asia,Uruguay
1,,,,,,,
2,114806.8,152175.2,2092116.0,453034.8,,,
3,123456.3,151233.5,2153779.0,625751.3,,648783.8,32837.01
4,128618.9,150648.8,2125326.0,663433.6,63222.64,687853.0,34324.81
5,128030.8,152523.4,2174681.0,709414.3,63557.42,735526.1,33669.94
6,134890.4,159059.1,2340646.0,749633.6,64569.77,777225.9,33019.32
7,139207.4,163680.6,2390282.0,778794.3,69445.98,807459.9,31750.05
8,143363.9,166057.1,2507818.0,814702.6,71748.84,844689.9,29308.31
9,149940.7,171536.3,2660937.0,870759.3,76280.92,902809.9,29494.37
10,159578.0,179424.8,2865048.0,938138.4,79943.38,972669.1,30972.33


In [131]:
# Colonnes avec au maximum 3 valeurs manquantes (inclus)
cols_with_some_missing = missing_values[missing_values <= 3]
print("Colonnes avec au maximum 3 valeurs manquantes :")
print(cols_with_some_missing)

Colonnes avec au maximum 3 valeurs manquantes :
Advanced Economies     0
Argentina              0
Australia              0
Austria                0
Belgium                0
                      ..
Taiwan, China          0
Uruguay                2
United States          0
World (WBG members)    0
South Africa           0
Length: 69, dtype: int64


In [132]:
# Filtrer les colonnes avec maximum 3 valeurs manquantes, supprimer les 3 premières lignes, et ajouter la colonne dates
GDP_yearly_27 = pd.concat([dates_column, GDP_yearly[cols_with_some_missing.index]], axis=1).iloc[3:]

# Mettre la première colonne (les dates) en indice
GDP_yearly_27.set_index(dates_column.name, inplace=True)

In [133]:
GDP_yearly_27

Unnamed: 0_level_0,Advanced Economies,Argentina,Australia,Austria,Belgium,Bulgaria,Bolivia,Brazil,Botswana,Canada,...,Slovakia,Slovenia,Sweden,Thailand,Turkey,"Taiwan, China",Uruguay,United States,World (WBG members),South Africa
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1998.0,34748425.0,312429.1,822151.8,324494.7,390604.3,34214.57,12428.37,1380438.0,7920.324,1199289.0,...,58152.23,34956.07,369272.7,189673.3,492792.6,258731.5,34324.81,11507833.0,44732271.0,264652.4
1999.0,35919081.0,301986.4,858627.3,336360.4,404442.0,31358.14,12477.49,1386899.0,8569.005,1260944.0,...,57874.93,36732.19,383804.5,198465.0,476598.7,276149.2,33669.94,12058971.0,46236350.0,271004.1
2000.0,37382444.0,299678.3,885234.2,347857.2,419474.7,32680.04,12788.76,1446778.0,8911.75,1325739.0,...,58299.67,38191.01,402314.2,207304.0,509954.1,293586.0,33019.32,12550632.0,48292596.0,282386.3
2001.0,37949845.0,286415.2,908255.9,352327.8,424087.2,33950.6,13008.93,1468145.0,9128.795,1350597.0,...,60012.34,39314.18,408516.8,214429.0,480103.2,289470.4,31750.05,12670565.0,49199238.0,290010.7
2002.0,38509270.0,255431.4,946033.3,357533.9,431325.9,35904.33,13338.36,1512544.0,9798.019,1391105.0,...,62656.91,40563.49,417733.3,227605.0,510852.0,305344.0,29308.31,12886034.0,50312988.0,300742.2
2003.0,39254192.0,278220.0,974561.1,361492.9,435802.0,37787.7,13699.69,1530224.0,10328.23,1416234.0,...,65743.52,41830.33,426081.5,243894.5,541334.1,318243.4,29494.37,13246277.0,51694933.0,309611.2
2004.0,40443966.0,291156.7,1014647.0,370404.1,451365.7,40285.19,14270.79,1613263.0,10468.58,1460029.0,...,69291.49,43519.64,441408.8,259197.4,592996.6,340367.4,30972.33,13756013.0,53726238.0,323712.8
2005.0,41561230.0,316928.9,1045384.0,379367.9,461846.0,43088.03,14893.26,1672095.0,10998.16,1506903.0,...,73766.11,45334.12,453702.3,270086.4,646793.2,358690.1,33287.42,14235209.0,55723540.0,340795.1
2006.0,42792887.0,342432.6,1072775.0,392373.2,473632.5,45994.01,15603.13,1738082.0,11679.03,1546654.0,...,80336.62,48212.15,476097.4,283525.1,692367.3,379388.1,34671.64,14631690.0,58041845.0,359892.7
2007.0,43905934.0,373277.8,1119325.0,406933.3,491048.4,49190.47,16308.41,1841111.0,12353.82,1578359.0,...,89011.38,51715.25,492230.6,299042.6,727963.1,405376.5,36926.32,14924710.0,60409465.0,379184.6


In [134]:
GDP_yearly_27.to_csv('GDP_yearly_27', index = True)

In [135]:
# Supprimer les 5 premières années et garder les colonnes sans NA
GDP_yearly_25 = GDP_yearly.iloc[5:].dropna(axis=1)
GDP_yearly_25 = GDP_yearly_25.set_index('Year')
GDP_yearly_25.to_csv('GDP_yearly_25', index = True)

In [136]:
GDP_yearly_25

Unnamed: 0_level_0,Advanced Economies,Argentina,Australia,Austria,Belgium,Bulgaria,Bosnia and Herzegovina,Bolivia,Brazil,Botswana,...,Slovenia,Sweden,Thailand,Tunisia,Turkey,"Taiwan, China",Uruguay,United States,World (WBG members),South Africa
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2000.0,37382444.0,299678.3,885234.2,347857.2,419474.7,32680.04,12601.0,12788.76,1446778.0,8911.75,...,38191.01,402314.2,207304.0,31693.32,509954.1,293586.0,33019.32,12550632.0,48292596.0,282386.3
2001.0,37949845.0,286415.2,908255.9,352327.8,424087.2,33950.6,12905.98,13008.93,1468145.0,9128.795,...,39314.18,408516.8,214429.0,32896.48,480103.2,289470.4,31750.05,12670565.0,49199238.0,290010.7
2002.0,38509270.0,255431.4,946033.3,357533.9,431325.9,35904.33,13549.23,13338.36,1512544.0,9798.019,...,40563.49,417733.3,227605.0,33331.59,510852.0,305344.0,29308.31,12886034.0,50312988.0,300742.2
2003.0,39254192.0,278220.0,974561.1,361492.9,435802.0,37787.7,14078.13,13699.69,1530224.0,10328.23,...,41830.33,426081.5,243894.5,34899.0,541334.1,318243.4,29494.37,13246277.0,51694933.0,309611.2
2004.0,40443966.0,291156.7,1014647.0,370404.1,451365.7,40285.19,14968.98,14270.79,1613263.0,10468.58,...,43519.64,441408.8,259197.4,37075.23,592996.6,340367.4,30972.33,13756013.0,53726238.0,323712.8
2005.0,41561230.0,316928.9,1045384.0,379367.9,461846.0,43088.03,15564.75,14893.26,1672095.0,10998.16,...,45334.12,453702.3,270086.4,38367.86,646793.2,358690.1,33287.42,14235209.0,55723540.0,340795.1
2006.0,42792887.0,342432.6,1072775.0,392373.2,473632.5,45994.01,16415.21,15603.13,1738082.0,11679.03,...,48212.15,476097.4,283525.1,40379.91,692367.3,379388.1,34671.64,14631690.0,58041845.0,359892.7
2007.0,43905934.0,373277.8,1119325.0,406933.3,491048.4,49190.47,17398.84,16308.41,1841111.0,12353.82,...,51715.25,492230.6,299042.6,43089.2,727963.1,405376.5,36926.32,14924710.0,60409465.0,379184.6
2008.0,43999974.0,388422.5,1148363.0,412528.5,493241.9,52171.01,18340.22,17314.73,1929176.0,12768.34,...,53287.25,486826.1,304364.9,44915.21,732893.8,408614.4,39584.6,14941805.0,61426740.0,391284.6
2009.0,42471539.0,365433.6,1170924.0,397776.9,483839.8,50362.19,17778.52,17897.43,1932479.0,10936.32,...,49054.6,466537.7,302251.4,46282.18,697411.3,402020.8,41241.3,14556810.0,60154422.0,385266.3


## Normalisation des données

### Normalisation des données entre 0 et 1, avec calcul du min et du max pour l'ensemble des pays

In [137]:
# Normalisation des données
GDP_yearly_30_normalized = (GDP_yearly_30 - GDP_yearly_30.min().min()) / (GDP_yearly_30.max().max() - GDP_yearly_30.min().min())
GDP_yearly_30_normalized.to_csv('GDP_yearly_30_global_normalized', index=False)

# Normalisation des données
GDP_yearly_27_normalized = (GDP_yearly_27 - GDP_yearly_27.min().min()) / (GDP_yearly_27.max().max() - GDP_yearly_27.min().min())
GDP_yearly_27_normalized.to_csv('GDP_yearly_27_global_normalized', index=False)

# Normalisation des données
GDP_yearly_25_normalized = (GDP_yearly_25 - GDP_yearly_25.min().min()) / (GDP_yearly_25.max().max() - GDP_yearly_25.min().min())
GDP_yearly_25_normalized.to_csv('GDP_yearly_25_global_normalized', index=False)

In [138]:
GDP_yearly_25_normalized

Unnamed: 0_level_0,Advanced Economies,Argentina,Australia,Austria,Belgium,Bulgaria,Bosnia and Herzegovina,Bolivia,Brazil,Botswana,...,Slovenia,Sweden,Thailand,Tunisia,Turkey,"Taiwan, China",Uruguay,United States,World (WBG members),South Africa
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2000.0,0.426292,0.003358,0.010036,0.003907,0.004724,0.000313,8.4e-05,8.6e-05,0.01644,4.1e-05,...,0.000375,0.004528,0.002304,0.000301,0.005756,0.003288,0.000316,0.143082,0.550724,0.00316
2001.0,0.432763,0.003206,0.010299,0.003958,0.004777,0.000327,8.7e-05,8.8e-05,0.016684,4.4e-05,...,0.000388,0.004599,0.002385,0.000315,0.005415,0.003241,0.000302,0.144449,0.561064,0.003247
2002.0,0.439144,0.002853,0.010729,0.004018,0.004859,0.000349,9.4e-05,9.2e-05,0.017191,5.2e-05,...,0.000402,0.004704,0.002536,0.00032,0.005766,0.003422,0.000274,0.146907,0.573767,0.00337
2003.0,0.447639,0.003113,0.011055,0.004063,0.00491,0.000371,0.0001,9.6e-05,0.017392,5.8e-05,...,0.000417,0.004799,0.002721,0.000338,0.006114,0.003569,0.000276,0.151015,0.589528,0.003471
2004.0,0.461209,0.00326,0.011512,0.004164,0.005088,0.000399,0.000111,0.000103,0.018339,5.9e-05,...,0.000436,0.004974,0.002896,0.000363,0.006703,0.003822,0.000293,0.156829,0.612695,0.003632
2005.0,0.473952,0.003554,0.011863,0.004267,0.005207,0.000431,0.000117,0.00011,0.01901,6.5e-05,...,0.000457,0.005114,0.00302,0.000377,0.007317,0.004031,0.000319,0.162294,0.635475,0.003827
2006.0,0.487999,0.003845,0.012175,0.004415,0.005342,0.000464,0.000127,0.000118,0.019763,7.3e-05,...,0.00049,0.00537,0.003173,0.0004,0.007836,0.004267,0.000335,0.166816,0.661915,0.004044
2007.0,0.500693,0.004197,0.012706,0.004581,0.00554,0.000501,0.000138,0.000126,0.020938,8.1e-05,...,0.00053,0.005554,0.00335,0.000431,0.008242,0.004563,0.000361,0.170158,0.688918,0.004264
2008.0,0.501766,0.00437,0.013037,0.004645,0.005565,0.000535,0.000149,0.000137,0.021942,8.5e-05,...,0.000548,0.005492,0.003411,0.000452,0.008299,0.0046,0.000391,0.170353,0.700521,0.004402
2009.0,0.484334,0.004108,0.013294,0.004476,0.005458,0.000514,0.000143,0.000144,0.02198,6.5e-05,...,0.000499,0.005261,0.003387,0.000468,0.007894,0.004525,0.00041,0.165962,0.68601,0.004334


### Normalisation des données entre 0 et 1, avec calcul du min et du max pour chaque colonne 

In [139]:
# Normalisation min-max
GDP_yearly_30_colnormalized = GDP_yearly_30.apply(
    lambda x: (x - x.min()) / (x.max() - x.min())
)

GDP_yearly_30_colnormalized.to_csv('GDP_yearly_30_colnormalized', index=True)

# Afficher le DataFrame normalisé
GDP_yearly_30_colnormalized

Unnamed: 0_level_0,Advanced Economies,Argentina,Australia,Austria,Belgium,Bulgaria,Bolivia,Brazil,Botswana,Canada,...,Serbia,Slovakia,Slovenia,Sweden,Thailand,Turkey,"Taiwan, China",United States,World (WBG members),South Africa
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1995.0,0.0,0.2551,0.0,0.0,0.0,0.127484,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.042127,0.0,0.0,0.0,0.0,0.0
1996.0,0.040282,0.317617,0.030497,0.034659,0.020306,0.194087,0.025494,0.022544,0.014954,0.018966,...,0.04345,0.040198,0.029625,0.020084,0.089287,0.027935,0.031108,0.038248,0.029261,0.048832
1997.0,0.088464,0.415351,0.068106,0.07241,0.079368,0.042826,0.055735,0.067687,0.060813,0.06784,...,0.111913,0.078223,0.077113,0.059123,0.065465,0.058186,0.063469,0.08504,0.065802,0.079628
1998.0,0.12752,0.465062,0.10714,0.131894,0.111068,0.078101,0.088088,0.072163,0.049843,0.114196,...,0.159426,0.106491,0.107018,0.109977,0.0,0.060125,0.087315,0.134284,0.088898,0.085704
1999.0,0.178353,0.419842,0.145945,0.197218,0.169437,0.0,0.090764,0.078065,0.105176,0.177795,...,0.066885,0.102824,0.159932,0.160203,0.036479,0.045652,0.127098,0.189267,0.120802,0.115016
2000.0,0.241896,0.409848,0.17425,0.26051,0.232847,0.036144,0.107723,0.132761,0.134413,0.244634,...,0.11716,0.10844,0.203394,0.224176,0.073154,0.075463,0.166925,0.238315,0.164418,0.167543
2001.0,0.266534,0.352416,0.198741,0.285122,0.252303,0.070884,0.119718,0.152279,0.152927,0.270276,...,0.172282,0.131084,0.236856,0.245614,0.102717,0.048784,0.157525,0.25028,0.18365,0.202728
2002.0,0.290826,0.218249,0.238931,0.313783,0.282837,0.124303,0.137666,0.192835,0.210013,0.312062,...,0.23122,0.16605,0.274076,0.277468,0.157387,0.076266,0.193782,0.271775,0.207274,0.252252
2003.0,0.323172,0.316929,0.26928,0.335578,0.301718,0.175799,0.157352,0.208985,0.25524,0.337983,...,0.275682,0.20686,0.311818,0.306321,0.224976,0.103509,0.223245,0.307713,0.236588,0.293181
2004.0,0.374835,0.372947,0.311924,0.384636,0.367368,0.244086,0.188467,0.284838,0.267212,0.383159,...,0.342241,0.253769,0.362146,0.359295,0.288471,0.149682,0.273778,0.358565,0.279675,0.358258


In [140]:
# Normalisation min-max
GDP_yearly_27_colnormalized_2 = GDP_yearly_27.apply(
    lambda x: (x - x.min()) / (x.max() - x.min())
)

GDP_yearly_27_colnormalized.to_csv('GDP_yearly_27_colnormalized', index=True)

# Afficher le DataFrame normalisé
GDP_yearly_27_colnormalized

Unnamed: 0_level_0,Advanced Economies,Argentina,Australia,Austria,Belgium,Bulgaria,Bolivia,Brazil,Botswana,Canada,...,Slovakia,Slovenia,Sweden,Thailand,Turkey,"Taiwan, China",Uruguay,United States,World (WBG members),South Africa
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1998.0,0.0,0.465062,0.0,0.0,0.0,0.078101,0.0,0.0,0.0,0.0,...,0.004087,0.0,0.0,0.0,0.015166,0.0,0.193577,0.0,0.0,0.0
1999.0,0.058263,0.419842,0.04346,0.075248,0.065662,0.0,0.002935,0.006361,0.058236,0.071799,...,0.0,0.059256,0.056431,0.036479,0.0,0.043589,0.169978,0.063511,0.035017,0.03206
2000.0,0.131093,0.409848,0.075163,0.148157,0.136995,0.036144,0.021531,0.065312,0.089006,0.147254,...,0.006259,0.107926,0.12831,0.073154,0.031237,0.087227,0.146532,0.120167,0.082889,0.08951
2001.0,0.159332,0.352416,0.102593,0.176508,0.158882,0.070884,0.034685,0.086347,0.108492,0.176202,...,0.031499,0.145398,0.152396,0.102717,0.003282,0.076927,0.100792,0.133988,0.103997,0.127994
2002.0,0.187174,0.218249,0.147604,0.209524,0.193231,0.124303,0.054367,0.130058,0.168572,0.223374,...,0.070471,0.187079,0.188187,0.157387,0.032078,0.116652,0.012801,0.158818,0.129927,0.18216
2003.0,0.224248,0.316929,0.181595,0.23463,0.214471,0.175799,0.075954,0.147464,0.216172,0.252638,...,0.115958,0.229344,0.220605,0.224976,0.060624,0.148934,0.019506,0.20033,0.1621,0.226926
2004.0,0.283462,0.372947,0.229357,0.291142,0.288323,0.244086,0.110075,0.229216,0.228772,0.303638,...,0.168244,0.285704,0.280125,0.288471,0.109006,0.204301,0.072766,0.25907,0.209392,0.298102
2005.0,0.339068,0.484547,0.265981,0.347987,0.338054,0.320722,0.147264,0.287135,0.276316,0.358224,...,0.234186,0.34624,0.327865,0.333652,0.159386,0.250156,0.156194,0.31429,0.255892,0.384324
2006.0,0.400366,0.594983,0.298617,0.430463,0.393983,0.400178,0.189675,0.352099,0.337441,0.404515,...,0.331015,0.442259,0.414831,0.389413,0.202066,0.301954,0.206076,0.359979,0.309865,0.480717
2007.0,0.455762,0.72855,0.354081,0.522798,0.476625,0.487576,0.231811,0.453531,0.398021,0.441436,...,0.458854,0.559132,0.477481,0.453798,0.235402,0.366993,0.287326,0.393745,0.364987,0.578092


In [141]:
# Normalisation min-max
GDP_yearly_25_colnormalized = GDP_yearly_25.apply(
    lambda x: (x - x.min()) / (x.max() - x.min())
)

GDP_yearly_25_colnormalized.to_csv('GDP_yearly_25_colnormalized', index=True)

# Afficher le DataFrame normalisé
GDP_yearly_25_colnormalized

Unnamed: 0_level_0,Advanced Economies,Argentina,Australia,Austria,Belgium,Bulgaria,Bosnia and Herzegovina,Bolivia,Brazil,Botswana,...,Slovenia,Sweden,Thailand,Tunisia,Turkey,"Taiwan, China",Uruguay,United States,World (WBG members),South Africa
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2000.0,0.0,0.409848,0.0,0.0,0.0,0.0,0.016303,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.028047,0.011158,0.146532,0.0,0.0,0.0
2001.0,0.0325,0.352416,0.02966,0.033282,0.025362,0.036043,0.041835,0.013443,0.022506,0.021389,...,0.042005,0.027632,0.031897,0.049216,0.0,0.0,0.100792,0.015708,0.023016,0.042267
2002.0,0.064542,0.218249,0.078329,0.07204,0.065163,0.091465,0.095685,0.033558,0.069271,0.087339,...,0.088728,0.06869,0.090882,0.067015,0.028891,0.043036,0.012801,0.043929,0.051289,0.101758
2003.0,0.10721,0.316929,0.115083,0.101513,0.089775,0.144892,0.139962,0.055621,0.087893,0.13959,...,0.136107,0.105881,0.163805,0.131131,0.057531,0.078008,0.019506,0.091112,0.08637,0.150925
2004.0,0.175358,0.372947,0.166727,0.167854,0.175351,0.21574,0.214541,0.090492,0.175357,0.153421,...,0.199286,0.174162,0.232312,0.220152,0.106072,0.13799,0.072766,0.157874,0.137936,0.229099
2005.0,0.239352,0.484547,0.206326,0.234586,0.232976,0.295249,0.264416,0.128499,0.237324,0.20561,...,0.267145,0.228929,0.281059,0.273029,0.156619,0.187665,0.156194,0.220636,0.188639,0.323797
2006.0,0.309899,0.594983,0.241615,0.331406,0.297783,0.377685,0.335613,0.171843,0.306827,0.272708,...,0.374781,0.328697,0.34122,0.355333,0.199439,0.243781,0.206076,0.272565,0.24749,0.429667
2007.0,0.373652,0.72855,0.301587,0.4398,0.393543,0.468361,0.417959,0.214907,0.415347,0.339206,...,0.505794,0.400568,0.410688,0.46616,0.232884,0.314239,0.287326,0.310943,0.307594,0.536614
2008.0,0.379038,0.79413,0.338997,0.481455,0.405604,0.552912,0.496767,0.276352,0.508105,0.380056,...,0.564585,0.376492,0.434514,0.540854,0.237517,0.323018,0.383121,0.313182,0.333418,0.603692
2009.0,0.291493,0.694583,0.368063,0.371634,0.353907,0.5016,0.449744,0.311932,0.511584,0.199516,...,0.406288,0.286109,0.425053,0.596771,0.204178,0.305141,0.442822,0.262758,0.301119,0.570328
