### Chargement des packages

In [2]:
import csv
import pandas as pd

# Données annuelles

## Chargement des données

In [119]:
GDP_yearly = pd.read_csv('../Data/GDP_yearly.csv')

In [120]:
GDP_yearly.rename(columns={GDP_yearly.columns[0] : 'Year'}, inplace=True)

In [121]:
GDP_yearly = GDP_yearly.iloc[1:]

In [122]:
GDP_yearly.head()

Unnamed: 0,Year,Albania,Advanced Economies,Argentina,Australia,Austria,Belgium,Bulgaria,Bahrain,Bosnia and Herzegovina,...,Sweden,Thailand,Tunisia,Turkey,"Taiwan, China",Ukraine,Uruguay,United States,World (WBG members),South Africa
1,1995.0,,31811706.0,263941.7,721440.7,300536.7,364273.3,36020.68,,,...,337452.5,199826.3,,425519.3,220503.9,,,10161774.0,40541300.0,246081.0
2,1996.0,,32739377.0,278378.9,750107.6,306832.4,369087.2,38456.56,,,...,343263.6,211192.2,,456775.6,234123.5,,,10545166.0,41920772.0,256662.6
3,1997.0,,33848979.0,300949.2,785460.1,313689.7,383089.1,32924.43,,,...,354558.9,205451.0,,490623.4,248291.3,,32837.01,11014212.0,43643455.0,263335.7
4,1998.0,,34748425.0,312429.1,822151.8,324494.7,390604.3,34214.57,,,...,369272.7,189673.3,,492792.6,258731.5,,34324.81,11507833.0,44732271.0,264652.4
5,1999.0,,35919081.0,301986.4,858627.3,336360.4,404442.0,31358.14,,,...,383804.5,198465.0,,476598.7,276149.2,,33669.94,12058971.0,46236350.0,271004.1


## Tri des colonnes pour ne conserver que les pays

In [144]:
print(list(GDP_yearly.columns))

['Year', 'Albania', 'Advanced Economies', 'Argentina', 'Australia', 'Austria', 'Belgium', 'Bulgaria', 'Bahrain', 'Bosnia and Herzegovina', 'Belarus', 'Bolivia', 'Brazil', 'Botswana', 'Canada', 'Switzerland', 'Chile', 'China', 'Cameroon', 'Colombia', 'Costa Rica', 'Cyprus', 'Czech Republic', 'Germany', 'Denmark', 'EMDE East Asia & Pacific', 'EMDE Europe & Central Asia', 'Ecuador', 'Egypt, Arab Rep.', 'Emerging Market and Developing Economies (EMDEs)', 'Spain', 'Estonia', 'Finland', 'France', 'United Kingdom', 'Georgia', 'Ghana', 'Greece', 'Guatemala', 'High Income Countries', 'Hong Kong SAR, China', 'Honduras', 'Croatia', 'Hungary', 'Indonesia', 'India', 'Ireland', 'Iceland', 'Israel', 'Italy', 'Jordan', 'Japan', 'Kazakhstan', 'Kenya', 'Korea, Rep.', 'Kuwait', 'EMDE Latin America & Caribbean', 'Low-Income Countries (LIC)', 'Sri Lanka', 'Lithuania', 'Luxembourg', 'Latvia', 'Morocco', 'Mexico', 'Middle-Income Countries (MIC)', 'North Macedonia', 'Malta', 'Mongolia', 'EMDE Middle East & N.

In [145]:
GDP_yearly = GDP_yearly.drop(columns=['Advanced Economies', 'EMDE East Asia & Pacific','EMDE Europe & Central Asia',
                        'Emerging Market and Developing Economies (EMDEs)', 'High Income Countries', 
                        'EMDE Latin America & Caribbean', 'Low-Income Countries (LIC)',
                        'Middle-Income Countries (MIC)', 'EMDE Middle East & N. Africa','EMDE South Asia',
                        'EMDE Sub-Saharan Africa','World (WBG members)'])

## Traitement des valeurs manquantes

In [146]:
missing_values = GDP_yearly.iloc[:, 1:].isnull().sum()

In [147]:
missing_values

Albania          13
Argentina         0
Australia         0
Austria           0
Belgium           0
                 ..
Taiwan, China     0
Ukraine           6
Uruguay           2
United States     0
South Africa      0
Length: 88, dtype: int64

#### Première solution : on supprime tous les pays avec NA
On garde une première base de données qui ne contient que les pays avec aucune valeur manquante : total de 55 pays et longueur total de 30 ans.

In [148]:
# Nombre de colonnes sans aucune valeur manquante
num_cols_no_missing = (missing_values == 0).sum()
print(f"\nNumber of countries without missing values : {num_cols_no_missing}")


Number of countries without missing values : 55


In [149]:
# Sélectionner les colonnes avec 0 valeurs manquantes
cols_with_no_missing = missing_values[missing_values == 0]

# Sélectionner la première colonne (les dates)
dates_column = GDP_yearly.iloc[:, 0]

# Filtrer les colonnes avec 0 valeurs manquantes et ajouter la première colonne
GDP_yearly_30 = pd.concat([dates_column, GDP_yearly[cols_with_no_missing.index]], axis=1)

# Mettre la première colonne (les dates) en indice
GDP_yearly_30.set_index(dates_column.name, inplace=True)

In [150]:
# Affichage du résultat
GDP_yearly_30

Unnamed: 0_level_0,Argentina,Australia,Austria,Belgium,Bulgaria,Bolivia,Brazil,Botswana,Canada,Switzerland,...,El Salvador,Serbia,Slovakia,Slovenia,Sweden,Thailand,Turkey,"Taiwan, China",United States,South Africa
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1995.0,263941.7,721440.7,300536.7,364273.3,36020.68,10811.53,1301438.0,7336.007,1088585.0,439298.5,...,15787.41,25017.32,50097.91,31363.95,337452.5,199826.3,425519.3,220503.9,10161774.0,246081.0
1996.0,278378.9,750107.6,306832.4,369087.2,38456.56,11279.47,1326118.0,7511.312,1106971.0,441216.2,...,15920.5,26498.96,53138.26,32358.35,343263.6,211192.2,456775.6,234123.5,10545166.0,256662.6
1997.0,300949.2,785460.1,313689.7,383089.1,32924.43,11834.54,1375538.0,8048.923,1154351.0,451075.8,...,16414.93,28833.51,56014.24,33952.29,354558.9,205451.0,490623.4,248291.3,11014212.0,263335.7
1998.0,312429.1,822151.8,324494.7,390604.3,34214.57,12428.37,1380438.0,7920.324,1199289.0,464321.8,...,16843.97,30453.7,58152.23,34956.07,369272.7,189673.3,492792.6,258731.5,11507833.0,264652.4
1999.0,301986.4,858627.3,336360.4,404442.0,31358.14,12477.49,1386899.0,8569.005,1260944.0,471884.1,...,17205.24,27298.09,57874.93,36732.19,383804.5,198465.0,476598.7,276149.2,12058971.0,271004.1
2000.0,299678.3,885234.2,347857.2,419474.7,32680.04,12788.76,1446778.0,8911.75,1325739.0,491290.6,...,17396.42,29012.44,58299.67,38191.01,402314.2,207304.0,509954.1,293586.0,12550632.0,282386.3
2001.0,286415.2,908255.9,352327.8,424087.2,33950.6,13008.93,1468145.0,9128.795,1350597.0,499214.8,...,17546.41,30892.08,60012.34,39314.18,408516.8,214429.0,480103.2,289470.4,12670565.0,290010.7
2002.0,255431.4,946033.3,357533.9,431325.9,35904.33,13338.36,1512544.0,9798.019,1391105.0,498937.8,...,17813.6,32901.86,62656.91,40563.49,417733.3,227605.0,510852.0,305344.0,12886034.0,300742.2
2003.0,278220.0,974561.1,361492.9,435802.0,37787.7,13699.69,1530224.0,10328.23,1416234.0,498390.9,...,18093.41,34418.0,65743.52,41830.33,426081.5,243894.5,541334.1,318243.4,13246277.0,309611.2
2004.0,291156.7,1014647.0,370404.1,451365.7,40285.19,14270.79,1613263.0,10468.58,1460029.0,511170.9,...,18254.31,36687.66,69291.49,43519.64,441408.8,259197.4,592996.6,340367.4,13756013.0,323712.8


In [151]:
GDP_yearly_30.to_csv('GDP_yearly_30', index = True)

#### Deuxième solution : on supprime les premières années qui présentent beaucoup de valeurs manquantes
On note que les valeurs manquantes se trouvent dans les années les plus anciennes de la série. Supprimer les 3 ou 5 premières années permet donc de conserver un certain nombre de pays qui ne présentent que quelques valeurs manquantes. On garde à chaque fois les pays ne présentant plus de valeurs manquantes sur les années suivantes.
Permet d'obtenir : 
- En supprimant les 3 premières années => T=27, N=60
- En supprimant les 5 premières années => T=25, N=71

In [152]:
# Colonnes avec entre 1 et 3 valeurs manquantes (inclus)
cols_with_some_missing = missing_values[(missing_values >= 1) & (missing_values <= 3)]
print("Colonnes avec entre 1 et 3 valeurs manquantes :")
print(cols_with_some_missing)

Colonnes avec entre 1 et 3 valeurs manquantes :
Chile             1
Czech Republic    1
India             1
Morocco           3
Uruguay           2
dtype: int64


In [153]:
# Afficher les colonnes du DataFrame avec entre 1 et 3 valeurs manquantes
df_with_some_missing = GDP_yearly[cols_with_some_missing.index]
print("Colonnes avec entre 1 et 3 valeurs manquantes :")
df_with_some_missing

Colonnes avec entre 1 et 3 valeurs manquantes :


Unnamed: 0,Chile,Czech Republic,India,Morocco,Uruguay
1,,,,,
2,114806.8,152175.2,453034.8,,
3,123456.3,151233.5,625751.3,,32837.01
4,128618.9,150648.8,663433.6,63222.64,34324.81
5,128030.8,152523.4,709414.3,63557.42,33669.94
6,134890.4,159059.1,749633.6,64569.77,33019.32
7,139207.4,163680.6,778794.3,69445.98,31750.05
8,143363.9,166057.1,814702.6,71748.84,29308.31
9,149940.7,171536.3,870759.3,76280.92,29494.37
10,159578.0,179424.8,938138.4,79943.38,30972.33


In [154]:
# Colonnes avec au maximum 3 valeurs manquantes (inclus)
cols_with_some_missing = missing_values[missing_values <= 3]
print("Colonnes avec au maximum 3 valeurs manquantes :")
print(cols_with_some_missing)

Colonnes avec au maximum 3 valeurs manquantes :
Argentina               0
Australia               0
Austria                 0
Belgium                 0
Bulgaria                0
Bolivia                 0
Brazil                  0
Botswana                0
Canada                  0
Switzerland             0
Chile                   1
China                   0
Costa Rica              0
Cyprus                  0
Czech Republic          1
Germany                 0
Denmark                 0
Spain                   0
Estonia                 0
Finland                 0
France                  0
United Kingdom          0
Greece                  0
Hong Kong SAR, China    0
Hungary                 0
Indonesia               0
India                   1
Ireland                 0
Iceland                 0
Israel                  0
Italy                   0
Jordan                  0
Japan                   0
Korea, Rep.             0
Lithuania               0
Luxembourg              0
Latvia          

In [155]:
# Filtrer les colonnes avec maximum 3 valeurs manquantes, supprimer les 3 premières lignes, et ajouter la colonne dates
GDP_yearly_27 = pd.concat([dates_column, GDP_yearly[cols_with_some_missing.index]], axis=1).iloc[3:]

# Mettre la première colonne (les dates) en indice
GDP_yearly_27.set_index(dates_column.name, inplace=True)

In [156]:
GDP_yearly_27

Unnamed: 0_level_0,Argentina,Australia,Austria,Belgium,Bulgaria,Bolivia,Brazil,Botswana,Canada,Switzerland,...,Serbia,Slovakia,Slovenia,Sweden,Thailand,Turkey,"Taiwan, China",Uruguay,United States,South Africa
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1998.0,312429.1,822151.8,324494.7,390604.3,34214.57,12428.37,1380438.0,7920.324,1199289.0,464321.8,...,30453.7,58152.23,34956.07,369272.7,189673.3,492792.6,258731.5,34324.81,11507833.0,264652.4
1999.0,301986.4,858627.3,336360.4,404442.0,31358.14,12477.49,1386899.0,8569.005,1260944.0,471884.1,...,27298.09,57874.93,36732.19,383804.5,198465.0,476598.7,276149.2,33669.94,12058971.0,271004.1
2000.0,299678.3,885234.2,347857.2,419474.7,32680.04,12788.76,1446778.0,8911.75,1325739.0,491290.6,...,29012.44,58299.67,38191.01,402314.2,207304.0,509954.1,293586.0,33019.32,12550632.0,282386.3
2001.0,286415.2,908255.9,352327.8,424087.2,33950.6,13008.93,1468145.0,9128.795,1350597.0,499214.8,...,30892.08,60012.34,39314.18,408516.8,214429.0,480103.2,289470.4,31750.05,12670565.0,290010.7
2002.0,255431.4,946033.3,357533.9,431325.9,35904.33,13338.36,1512544.0,9798.019,1391105.0,498937.8,...,32901.86,62656.91,40563.49,417733.3,227605.0,510852.0,305344.0,29308.31,12886034.0,300742.2
2003.0,278220.0,974561.1,361492.9,435802.0,37787.7,13699.69,1530224.0,10328.23,1416234.0,498390.9,...,34418.0,65743.52,41830.33,426081.5,243894.5,541334.1,318243.4,29494.37,13246277.0,309611.2
2004.0,291156.7,1014647.0,370404.1,451365.7,40285.19,14270.79,1613263.0,10468.58,1460029.0,511170.9,...,36687.66,69291.49,43519.64,441408.8,259197.4,592996.6,340367.4,30972.33,13756013.0,323712.8
2005.0,316928.9,1045384.0,379367.9,461846.0,43088.03,14893.26,1672095.0,10998.16,1506903.0,525811.7,...,38870.45,73766.11,45334.12,453702.3,270086.4,646793.2,358690.1,33287.42,14235209.0,340795.1
2006.0,342432.6,1072775.0,392373.2,473632.5,45994.01,15603.13,1738082.0,11679.03,1546654.0,547642.9,...,40420.25,80336.62,48212.15,476097.4,283525.1,692367.3,379388.1,34671.64,14631690.0,359892.7
2007.0,373277.8,1119325.0,406933.3,491048.4,49190.47,16308.41,1841111.0,12353.82,1578359.0,569103.6,...,43603.12,89011.38,51715.25,492230.6,299042.6,727963.1,405376.5,36926.32,14924710.0,379184.6


In [157]:
GDP_yearly_27.to_csv('GDP_yearly_27', index = True)

In [158]:
# Supprimer les 5 premières années et garder les colonnes sans NA
GDP_yearly_25 = GDP_yearly.iloc[5:].dropna(axis=1)
GDP_yearly_25 = GDP_yearly_25.set_index('Year')
GDP_yearly_25.to_csv('GDP_yearly_25', index = True)

In [159]:
GDP_yearly_25

Unnamed: 0_level_0,Argentina,Australia,Austria,Belgium,Bulgaria,Bosnia and Herzegovina,Bolivia,Brazil,Botswana,Canada,...,Slovakia,Slovenia,Sweden,Thailand,Tunisia,Turkey,"Taiwan, China",Uruguay,United States,South Africa
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2000.0,299678.3,885234.2,347857.2,419474.7,32680.04,12601.0,12788.76,1446778.0,8911.75,1325739.0,...,58299.67,38191.01,402314.2,207304.0,31693.32,509954.1,293586.0,33019.32,12550632.0,282386.3
2001.0,286415.2,908255.9,352327.8,424087.2,33950.6,12905.98,13008.93,1468145.0,9128.795,1350597.0,...,60012.34,39314.18,408516.8,214429.0,32896.48,480103.2,289470.4,31750.05,12670565.0,290010.7
2002.0,255431.4,946033.3,357533.9,431325.9,35904.33,13549.23,13338.36,1512544.0,9798.019,1391105.0,...,62656.91,40563.49,417733.3,227605.0,33331.59,510852.0,305344.0,29308.31,12886034.0,300742.2
2003.0,278220.0,974561.1,361492.9,435802.0,37787.7,14078.13,13699.69,1530224.0,10328.23,1416234.0,...,65743.52,41830.33,426081.5,243894.5,34899.0,541334.1,318243.4,29494.37,13246277.0,309611.2
2004.0,291156.7,1014647.0,370404.1,451365.7,40285.19,14968.98,14270.79,1613263.0,10468.58,1460029.0,...,69291.49,43519.64,441408.8,259197.4,37075.23,592996.6,340367.4,30972.33,13756013.0,323712.8
2005.0,316928.9,1045384.0,379367.9,461846.0,43088.03,15564.75,14893.26,1672095.0,10998.16,1506903.0,...,73766.11,45334.12,453702.3,270086.4,38367.86,646793.2,358690.1,33287.42,14235209.0,340795.1
2006.0,342432.6,1072775.0,392373.2,473632.5,45994.01,16415.21,15603.13,1738082.0,11679.03,1546654.0,...,80336.62,48212.15,476097.4,283525.1,40379.91,692367.3,379388.1,34671.64,14631690.0,359892.7
2007.0,373277.8,1119325.0,406933.3,491048.4,49190.47,17398.84,16308.41,1841111.0,12353.82,1578359.0,...,89011.38,51715.25,492230.6,299042.6,43089.2,727963.1,405376.5,36926.32,14924710.0,379184.6
2008.0,388422.5,1148363.0,412528.5,493241.9,52171.01,18340.22,17314.73,1929176.0,12768.34,1594070.0,...,93901.84,53287.25,486826.1,304364.9,44915.21,732893.8,408614.4,39584.6,14941805.0,391284.6
2009.0,365433.6,1170924.0,397776.9,483839.8,50362.19,17778.52,17897.43,1932479.0,10936.32,1547602.0,...,88754.05,49054.6,466537.7,302251.4,46282.18,697411.3,402020.8,41241.3,14556810.0,385266.3


## Normalisation des données

### Normalisation des données entre 0 et 1, avec calcul du min et du max pour l'ensemble des pays

In [160]:
# Normalisation des données
GDP_yearly_30_normalized = (GDP_yearly_30 - GDP_yearly_30.min().min()) / (GDP_yearly_30.max().max() - GDP_yearly_30.min().min())
GDP_yearly_30_normalized.to_csv('GDP_yearly_30_global_normalized', index=False)

# Normalisation des données
GDP_yearly_27_normalized = (GDP_yearly_27 - GDP_yearly_27.min().min()) / (GDP_yearly_27.max().max() - GDP_yearly_27.min().min())
GDP_yearly_27_normalized.to_csv('GDP_yearly_27_global_normalized', index=False)

# Normalisation des données
GDP_yearly_25_normalized = (GDP_yearly_25 - GDP_yearly_25.min().min()) / (GDP_yearly_25.max().max() - GDP_yearly_25.min().min())
GDP_yearly_25_normalized.to_csv('GDP_yearly_25_global_normalized', index=False)

In [161]:
GDP_yearly_25_normalized

Unnamed: 0_level_0,Argentina,Australia,Austria,Belgium,Bulgaria,Bosnia and Herzegovina,Bolivia,Brazil,Botswana,Canada,...,Slovakia,Slovenia,Sweden,Thailand,Tunisia,Turkey,"Taiwan, China",Uruguay,United States,South Africa
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2000.0,0.014588,0.043604,0.016976,0.020525,0.001358,0.000363,0.000372,0.07143,0.00018,0.065433,...,0.002627,0.001631,0.019674,0.010011,0.001309,0.025008,0.014286,0.001375,0.621659,0.013731
2001.0,0.013931,0.044745,0.017197,0.020753,0.001421,0.000378,0.000383,0.072489,0.000191,0.066664,...,0.002712,0.001687,0.019982,0.010364,0.001368,0.023529,0.014082,0.001312,0.627602,0.014109
2002.0,0.012396,0.046617,0.017455,0.021112,0.001518,0.00041,0.000399,0.074689,0.000224,0.068672,...,0.002843,0.001748,0.020438,0.011017,0.00139,0.025053,0.014869,0.001191,0.638279,0.014641
2003.0,0.013525,0.048031,0.017651,0.021334,0.001611,0.000436,0.000417,0.075565,0.00025,0.069917,...,0.002996,0.001811,0.020852,0.011824,0.001468,0.026563,0.015508,0.0012,0.65613,0.015081
2004.0,0.014166,0.050017,0.018093,0.022105,0.001735,0.00048,0.000446,0.07968,0.000257,0.072087,...,0.003172,0.001895,0.021611,0.012582,0.001576,0.029123,0.016605,0.001273,0.681389,0.015779
2005.0,0.015443,0.05154,0.018537,0.022624,0.001874,0.00051,0.000476,0.082596,0.000283,0.07441,...,0.003394,0.001985,0.022221,0.013122,0.00164,0.031789,0.017513,0.001388,0.705135,0.016626
2006.0,0.016707,0.052898,0.019182,0.023208,0.002018,0.000552,0.000512,0.085865,0.000317,0.07638,...,0.003719,0.002127,0.02333,0.013788,0.001739,0.034047,0.018538,0.001456,0.724782,0.017572
2007.0,0.018235,0.055204,0.019903,0.024071,0.002176,0.000601,0.000547,0.090971,0.000351,0.077951,...,0.004149,0.002301,0.02413,0.014557,0.001874,0.035811,0.019826,0.001568,0.739302,0.018528
2008.0,0.018986,0.056643,0.02018,0.02418,0.002324,0.000647,0.000596,0.095335,0.000371,0.078729,...,0.004391,0.002379,0.023862,0.014821,0.001964,0.036055,0.019986,0.0017,0.740149,0.019128
2009.0,0.017847,0.057761,0.019449,0.023714,0.002234,0.000619,0.000625,0.095498,0.00028,0.076427,...,0.004136,0.002169,0.022857,0.014716,0.002032,0.034297,0.01966,0.001782,0.721071,0.018829


### Normalisation des données entre 0 et 1, avec calcul du min et du max pour chaque colonne 

In [162]:
# Normalisation min-max
GDP_yearly_30_colnormalized = GDP_yearly_30.apply(
    lambda x: (x - x.min()) / (x.max() - x.min())
)

GDP_yearly_30_colnormalized.to_csv('GDP_yearly_30_colnormalized', index=True)

# Afficher le DataFrame normalisé
GDP_yearly_30_colnormalized

Unnamed: 0_level_0,Argentina,Australia,Austria,Belgium,Bulgaria,Bolivia,Brazil,Botswana,Canada,Switzerland,...,El Salvador,Serbia,Slovakia,Slovenia,Sweden,Thailand,Turkey,"Taiwan, China",United States,South Africa
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1995.0,0.2551,0.0,0.0,0.0,0.127484,0.0,0.0,0.0,0.0,0.0,...,0.117867,0.0,0.0,0.0,0.0,0.042127,0.0,0.0,0.0,0.0
1996.0,0.317617,0.030497,0.034659,0.020306,0.194087,0.025494,0.022544,0.014954,0.018966,0.006323,...,0.127552,0.04345,0.040198,0.029625,0.020084,0.089287,0.027935,0.031108,0.038248,0.048832
1997.0,0.415351,0.068106,0.07241,0.079368,0.042826,0.055735,0.067687,0.060813,0.06784,0.038831,...,0.163531,0.111913,0.078223,0.077113,0.059123,0.065465,0.058186,0.063469,0.08504,0.079628
1998.0,0.465062,0.10714,0.131894,0.111068,0.078101,0.088088,0.072163,0.049843,0.114196,0.082504,...,0.194751,0.159426,0.106491,0.107018,0.109977,0.0,0.060125,0.087315,0.134284,0.085704
1999.0,0.419842,0.145945,0.197218,0.169437,0.0,0.090764,0.078065,0.105176,0.177795,0.107437,...,0.22104,0.066885,0.102824,0.159932,0.160203,0.036479,0.045652,0.127098,0.189267,0.115016
2000.0,0.409848,0.17425,0.26051,0.232847,0.036144,0.107723,0.132761,0.134413,0.244634,0.171422,...,0.234952,0.11716,0.10844,0.203394,0.224176,0.073154,0.075463,0.166925,0.238315,0.167543
2001.0,0.352416,0.198741,0.285122,0.252303,0.070884,0.119718,0.152279,0.152927,0.270276,0.197548,...,0.245866,0.172282,0.131084,0.236856,0.245614,0.102717,0.048784,0.157525,0.25028,0.202728
2002.0,0.218249,0.238931,0.313783,0.282837,0.124303,0.137666,0.192835,0.210013,0.312062,0.196635,...,0.265309,0.23122,0.16605,0.274076,0.277468,0.157387,0.076266,0.193782,0.271775,0.252252
2003.0,0.316929,0.26928,0.335578,0.301718,0.175799,0.157352,0.208985,0.25524,0.337983,0.194832,...,0.28567,0.275682,0.20686,0.311818,0.306321,0.224976,0.103509,0.223245,0.307713,0.293181
2004.0,0.372947,0.311924,0.384636,0.367368,0.244086,0.188467,0.284838,0.267212,0.383159,0.236968,...,0.297379,0.342241,0.253769,0.362146,0.359295,0.288471,0.149682,0.273778,0.358565,0.358258


In [165]:
# Normalisation min-max
GDP_yearly_27_colnormalized = GDP_yearly_27.apply(
    lambda x: (x - x.min()) / (x.max() - x.min())
)

GDP_yearly_27_colnormalized.to_csv('GDP_yearly_27_colnormalized', index=True)

# Afficher le DataFrame normalisé
GDP_yearly_27_colnormalized

Unnamed: 0_level_0,Argentina,Australia,Austria,Belgium,Bulgaria,Bolivia,Brazil,Botswana,Canada,Switzerland,...,Serbia,Slovakia,Slovenia,Sweden,Thailand,Turkey,"Taiwan, China",Uruguay,United States,South Africa
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1998.0,0.465062,0.0,0.0,0.0,0.078101,0.0,0.0,0.0,0.0,0.0,...,0.099174,0.004087,0.0,0.0,0.0,0.015166,0.0,0.193577,0.0,0.0
1999.0,0.419842,0.04346,0.075248,0.065662,0.0,0.002935,0.006361,0.058236,0.071799,0.027176,...,0.0,0.0,0.059256,0.056431,0.036479,0.0,0.043589,0.169978,0.063511,0.03206
2000.0,0.409848,0.075163,0.148157,0.136995,0.036144,0.021531,0.065312,0.089006,0.147254,0.096914,...,0.053878,0.006259,0.107926,0.12831,0.073154,0.031237,0.087227,0.146532,0.120167,0.08951
2001.0,0.352416,0.102593,0.176508,0.158882,0.070884,0.034685,0.086347,0.108492,0.176202,0.12539,...,0.112951,0.031499,0.145398,0.152396,0.102717,0.003282,0.076927,0.100792,0.133988,0.127994
2002.0,0.218249,0.147604,0.209524,0.193231,0.124303,0.054367,0.130058,0.168572,0.223374,0.124394,...,0.176114,0.070471,0.187079,0.188187,0.157387,0.032078,0.116652,0.012801,0.158818,0.18216
2003.0,0.316929,0.181595,0.23463,0.214471,0.175799,0.075954,0.147464,0.216172,0.252638,0.122429,...,0.223763,0.115958,0.229344,0.220605,0.224976,0.060624,0.148934,0.019506,0.20033,0.226926
2004.0,0.372947,0.229357,0.291142,0.288323,0.244086,0.110075,0.229216,0.228772,0.303638,0.168355,...,0.295093,0.168244,0.285704,0.280125,0.288471,0.109006,0.204301,0.072766,0.25907,0.298102
2005.0,0.484547,0.265981,0.347987,0.338054,0.320722,0.147264,0.287135,0.276316,0.358224,0.220967,...,0.363694,0.234186,0.34624,0.327865,0.333652,0.159386,0.250156,0.156194,0.31429,0.384324
2006.0,0.594983,0.298617,0.430463,0.393983,0.400178,0.189675,0.352099,0.337441,0.404515,0.299419,...,0.412401,0.331015,0.442259,0.414831,0.389413,0.202066,0.301954,0.206076,0.359979,0.480717
2007.0,0.72855,0.354081,0.522798,0.476625,0.487576,0.231811,0.453531,0.398021,0.441436,0.376539,...,0.512431,0.458854,0.559132,0.477481,0.453798,0.235402,0.366993,0.287326,0.393745,0.578092


In [164]:
# Normalisation min-max
GDP_yearly_25_colnormalized = GDP_yearly_25.apply(
    lambda x: (x - x.min()) / (x.max() - x.min())
)

GDP_yearly_25_colnormalized.to_csv('GDP_yearly_25_colnormalized', index=True)

# Afficher le DataFrame normalisé
GDP_yearly_25_colnormalized

Unnamed: 0_level_0,Argentina,Australia,Austria,Belgium,Bulgaria,Bosnia and Herzegovina,Bolivia,Brazil,Botswana,Canada,...,Slovakia,Slovenia,Sweden,Thailand,Tunisia,Turkey,"Taiwan, China",Uruguay,United States,South Africa
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2000.0,0.409848,0.0,0.0,0.0,0.0,0.016303,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.028047,0.011158,0.146532,0.0,0.0
2001.0,0.352416,0.02966,0.033282,0.025362,0.036043,0.041835,0.013443,0.022506,0.021389,0.033946,...,0.025398,0.042005,0.027632,0.031897,0.049216,0.0,0.0,0.100792,0.015708,0.042267
2002.0,0.218249,0.078329,0.07204,0.065163,0.091465,0.095685,0.033558,0.069271,0.087339,0.089265,...,0.064617,0.088728,0.06869,0.090882,0.067015,0.028891,0.043036,0.012801,0.043929,0.101758
2003.0,0.316929,0.115083,0.101513,0.089775,0.144892,0.139962,0.055621,0.087893,0.13959,0.123581,...,0.11039,0.136107,0.105881,0.163805,0.131131,0.057531,0.078008,0.019506,0.091112,0.150925
2004.0,0.372947,0.166727,0.167854,0.175351,0.21574,0.214541,0.090492,0.175357,0.153421,0.183389,...,0.163005,0.199286,0.174162,0.232312,0.220152,0.106072,0.13799,0.072766,0.157874,0.229099
2005.0,0.484547,0.206326,0.234586,0.232976,0.295249,0.264416,0.128499,0.237324,0.20561,0.247401,...,0.229363,0.267145,0.228929,0.281059,0.273029,0.156619,0.187665,0.156194,0.220636,0.323797
2006.0,0.594983,0.241615,0.331406,0.297783,0.377685,0.335613,0.171843,0.306827,0.272708,0.301685,...,0.326801,0.374781,0.328697,0.34122,0.355333,0.199439,0.243781,0.206076,0.272565,0.429667
2007.0,0.72855,0.301587,0.4398,0.393543,0.468361,0.417959,0.214907,0.415347,0.339206,0.344982,...,0.455446,0.505794,0.400568,0.410688,0.46616,0.232884,0.314239,0.287326,0.310943,0.536614
2008.0,0.79413,0.338997,0.481455,0.405604,0.552912,0.496767,0.276352,0.508105,0.380056,0.366437,...,0.52797,0.564585,0.376492,0.434514,0.540854,0.237517,0.323018,0.383121,0.313182,0.603692
2009.0,0.694583,0.368063,0.371634,0.353907,0.5016,0.449744,0.311932,0.511584,0.199516,0.30298,...,0.451629,0.406288,0.286109,0.425053,0.596771,0.204178,0.305141,0.442822,0.262758,0.570328


## Stationarity tests

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller, kpss

In [20]:
# N=71 pays, longueur T=25 ans (entre 2000 et 2024), normalisé entre 0 et 1 (par colonne et au global)
GDP_25 = pd.read_csv('../Data preprocessing/GDP_yearly_25_colnormalized', index_col=0)
GDP_global_25 = pd.read_csv('../Data preprocessing/GDP_yearly_25_global_normalized')

In [21]:
# Test Dickey-Fuller Augmenté (ADF)
def adf_test(series):
    result = adfuller(series, autolag='AIC')
    return {
        "Statistique de test": result[0],
        "Valeur-p": result[1],
        "Stationnaire": result[1] <= 0.05,  # True si stationnaire
    }

# Test KPSS
def kpss_test(series):
    result = kpss(series, regression='c', nlags='auto')
    return {
        "Statistique de test": result[0],
        "Valeur-p": result[1],
        "Stationnaire": result[1] > 0.05,  # True si stationnaire
    }


In [22]:
# Liste pour stocker les résultats
results = []

# Grouper les données par pays
for n in range(0,71):
    # Extraire la série temporelle pour le pays
    series = GDP_25.iloc[:,n].values

    # Appliquer les tests
    adf_result = adf_test(series)
    kpss_result = kpss_test(series)

    # Ajouter les résultats dans la liste
    results.append({
        "Pays": n,
        "ADF_Stat": adf_result["Statistique de test"],
        "ADF_p_value": adf_result["Valeur-p"],
        "ADF_Stationnaire": adf_result["Stationnaire"],
        "KPSS_Stat": kpss_result["Statistique de test"],
        "KPSS_p_value": kpss_result["Valeur-p"],
        "KPSS_Stationnaire": kpss_result["Stationnaire"],
    })

# Convertir les résultats en DataFrame
results_df = pd.DataFrame(results)

# Afficher les résultats
print(results_df)


    Pays  ADF_Stat  ADF_p_value  ADF_Stationnaire  KPSS_Stat  KPSS_p_value  \
0      0 -1.169690     0.686497             False   0.388573      0.082081   
1      1 -1.963869     0.302636             False   0.686615      0.014762   
2      2 -1.972453     0.298780             False   0.601684      0.022483   
3      3 -0.593495     0.872445             False   0.642392      0.018783   
4      4 -0.804049     0.817930             False   0.667743      0.016478   
..   ...       ...          ...               ...        ...           ...   
66    66  0.333414     0.978827             False   0.702841      0.013287   
67    67 -1.495519     0.535727             False   0.695989      0.013910   
68    68 -3.448596     0.009410              True   0.546432      0.031209   
69    69 -1.319676     0.620123             False   0.671512      0.016135   
70    70 -2.099122     0.244868             False   0.598433      0.022779   

    KPSS_Stationnaire  
0                True  
1              

look-up table. The actual p-value is greater than the p-value returned.

  result = kpss(series, regression='c', nlags='auto')
look-up table. The actual p-value is greater than the p-value returned.

  result = kpss(series, regression='c', nlags='auto')


In [23]:
sum(results_df.ADF_Stationnaire == True)

4

In [24]:
sum(results_df.KPSS_Stationnaire == True)

6

Selon le test ADF, seulement 4 pays pour lesquels la série est stationnaire ; pour le test KPSS, seulement 6 pays. 

## Différenciation

In [27]:
# On reprend la série non normalisée
GDP_25 = pd.read_csv('../Data preprocessing/GDP_yearly_25', index_col=0)

In [28]:
GDP_25.head()

Unnamed: 0_level_0,Argentina,Australia,Austria,Belgium,Bulgaria,Bosnia and Herzegovina,Bolivia,Brazil,Botswana,Canada,...,Slovakia,Slovenia,Sweden,Thailand,Tunisia,Turkey,"Taiwan, China",Uruguay,United States,South Africa
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2000.0,299678.3,885234.2,347857.2,419474.7,32680.04,12601.0,12788.76,1446778.0,8911.75,1325739.0,...,58299.67,38191.01,402314.2,207304.0,31693.32,509954.1,293586.0,33019.32,12550632.0,282386.3
2001.0,286415.2,908255.9,352327.8,424087.2,33950.6,12905.98,13008.93,1468145.0,9128.795,1350597.0,...,60012.34,39314.18,408516.8,214429.0,32896.48,480103.2,289470.4,31750.05,12670565.0,290010.7
2002.0,255431.4,946033.3,357533.9,431325.9,35904.33,13549.23,13338.36,1512544.0,9798.019,1391105.0,...,62656.91,40563.49,417733.3,227605.0,33331.59,510852.0,305344.0,29308.31,12886034.0,300742.2
2003.0,278220.0,974561.1,361492.9,435802.0,37787.7,14078.13,13699.69,1530224.0,10328.23,1416234.0,...,65743.52,41830.33,426081.5,243894.5,34899.0,541334.1,318243.4,29494.37,13246277.0,309611.2
2004.0,291156.7,1014647.0,370404.1,451365.7,40285.19,14968.98,14270.79,1613263.0,10468.58,1460029.0,...,69291.49,43519.64,441408.8,259197.4,37075.23,592996.6,340367.4,30972.33,13756013.0,323712.8


In [29]:
# Différenciation de premier ordre (\Delta_t = y_t - y_{t-1})
GDP_25_diff = GDP_25.diff().dropna()  # .diff() calcule la différence, .dropna() enlève la première ligne (NaN)

# Vérifiez les résultats
GDP_25_diff.head()

Unnamed: 0_level_0,Argentina,Australia,Austria,Belgium,Bulgaria,Bosnia and Herzegovina,Bolivia,Brazil,Botswana,Canada,...,Slovakia,Slovenia,Sweden,Thailand,Tunisia,Turkey,"Taiwan, China",Uruguay,United States,South Africa
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2001.0,-13263.1,23021.7,4470.6,4612.5,1270.56,304.98,220.17,21367.0,217.045,24858.0,...,1712.67,1123.17,6202.6,7125.0,1203.16,-29850.9,-4115.6,-1269.27,119933.0,7624.4
2002.0,-30983.8,37777.4,5206.1,7238.7,1953.73,643.25,329.43,44399.0,669.224,40508.0,...,2644.57,1249.31,9216.5,13176.0,435.11,30748.8,15873.6,-2441.74,215469.0,10731.5
2003.0,22788.6,28527.8,3959.0,4476.1,1883.37,528.9,361.33,17680.0,530.211,25129.0,...,3086.61,1266.84,8348.2,16289.5,1567.41,30482.1,12899.4,186.06,360243.0,8869.0
2004.0,12936.7,40085.9,8911.2,15563.7,2497.49,890.85,571.1,83039.0,140.35,43795.0,...,3547.97,1689.31,15327.3,15302.9,2176.23,51662.5,22124.0,1477.96,509736.0,14101.6
2005.0,25772.2,30737.0,8963.8,10480.3,2802.84,595.77,622.47,58832.0,529.58,46874.0,...,4474.62,1814.48,12293.5,10889.0,1292.63,53796.6,18322.7,2315.09,479196.0,17082.3


In [32]:
# Tests de stationnarité 

# Liste pour stocker les résultats
results = []

# Grouper les données par pays
for n in range(0,71):
    # Extraire la série temporelle pour le pays
    series = GDP_25_diff.iloc[:,n].values

    # Appliquer les tests
    adf_result = adf_test(series)
    kpss_result = kpss_test(series)

    # Ajouter les résultats dans la liste
    results.append({
        "Pays": n,
        "ADF_Stat": adf_result["Statistique de test"],
        "ADF_p_value": adf_result["Valeur-p"],
        "ADF_Stationnaire": adf_result["Stationnaire"],
        "KPSS_Stat": kpss_result["Statistique de test"],
        "KPSS_p_value": kpss_result["Valeur-p"],
        "KPSS_Stationnaire": kpss_result["Stationnaire"],
    })

# Convertir les résultats en DataFrame
results_df = pd.DataFrame(results)

# Afficher les résultats
print(results_df)

look-up table. The actual p-value is greater than the p-value returned.

  result = kpss(series, regression='c', nlags='auto')
look-up table. The actual p-value is greater than the p-value returned.

  result = kpss(series, regression='c', nlags='auto')
look-up table. The actual p-value is greater than the p-value returned.

  result = kpss(series, regression='c', nlags='auto')
look-up table. The actual p-value is greater than the p-value returned.

  result = kpss(series, regression='c', nlags='auto')
look-up table. The actual p-value is greater than the p-value returned.

  result = kpss(series, regression='c', nlags='auto')
look-up table. The actual p-value is greater than the p-value returned.

  result = kpss(series, regression='c', nlags='auto')
look-up table. The actual p-value is greater than the p-value returned.

  result = kpss(series, regression='c', nlags='auto')
look-up table. The actual p-value is greater than the p-value returned.

  result = kpss(series, regression='c'

    Pays  ADF_Stat  ADF_p_value  ADF_Stationnaire  KPSS_Stat  KPSS_p_value  \
0      0  0.074915     0.964306             False   0.348559      0.099328   
1      1 -1.134984     0.700921             False   0.278392      0.100000   
2      2 -1.235303     0.658228             False   0.352526      0.097618   
3      3 -5.531575     0.000002              True   0.278962      0.100000   
4      4 -3.119939     0.025112              True   0.308344      0.100000   
..   ...       ...          ...               ...        ...           ...   
66    66 -4.006085     0.001379              True   0.126277      0.100000   
67    67 -2.316508     0.166699             False   0.236755      0.100000   
68    68 -1.347897     0.606985             False   0.350795      0.098364   
69    69 -1.156465     0.692038             False   0.220540      0.100000   
70    70  0.637464     0.988504             False   0.507471      0.039984   

    KPSS_Stationnaire  
0                True  
1              

look-up table. The actual p-value is greater than the p-value returned.

  result = kpss(series, regression='c', nlags='auto')
look-up table. The actual p-value is greater than the p-value returned.

  result = kpss(series, regression='c', nlags='auto')
look-up table. The actual p-value is greater than the p-value returned.

  result = kpss(series, regression='c', nlags='auto')
look-up table. The actual p-value is greater than the p-value returned.

  result = kpss(series, regression='c', nlags='auto')
look-up table. The actual p-value is greater than the p-value returned.

  result = kpss(series, regression='c', nlags='auto')
look-up table. The actual p-value is greater than the p-value returned.

  result = kpss(series, regression='c', nlags='auto')
look-up table. The actual p-value is greater than the p-value returned.

  result = kpss(series, regression='c', nlags='auto')
look-up table. The actual p-value is greater than the p-value returned.

  result = kpss(series, regression='c'

In [34]:
sum(results_df.ADF_Stationnaire == True)

13

In [35]:
sum(results_df.KPSS_Stationnaire == True)

68

=> Sur la série différenciée : 13 séries stationnaires selon ADF, 68 selon KPSS

In [42]:
# Normalisation min-max de la série différenciée
GDP_yearly_25_diff_normalized = GDP_25_diff.apply(
    lambda x: (x - x.min()) / (x.max() - x.min())
)

# Export des séries
GDP_25_diff.to_csv('GDP_yearly_25_diff', index=True)

GDP_yearly_25_diff_normalized.to_csv('GDP_yearly_25_diff_normalized', index=True)


## Taux d'évolution

In [31]:
# Calcul du taux d'évolution (\tau_t = (y_t - y_{t-1})/y_{t-1})
GDP_25_growth = GDP_25.pct_change().dropna()

# Vérifiez les résultats
GDP_25_growth.head()

Unnamed: 0_level_0,Argentina,Australia,Austria,Belgium,Bulgaria,Bosnia and Herzegovina,Bolivia,Brazil,Botswana,Canada,...,Slovakia,Slovenia,Sweden,Thailand,Tunisia,Turkey,"Taiwan, China",Uruguay,United States,South Africa
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2001.0,-0.044258,0.026006,0.012852,0.010996,0.038879,0.024203,0.017216,0.014769,0.024355,0.01875,...,0.029377,0.029409,0.015417,0.03437,0.037963,-0.058536,-0.014018,-0.03844,0.009556,0.027
2002.0,-0.108178,0.041593,0.014776,0.017069,0.057546,0.049841,0.025323,0.030242,0.073309,0.029993,...,0.044067,0.031778,0.022561,0.061447,0.013227,0.064046,0.054837,-0.076905,0.017005,0.037004
2003.0,0.089216,0.030155,0.011073,0.010378,0.052455,0.039035,0.02709,0.011689,0.054114,0.018064,...,0.049262,0.031231,0.019985,0.071569,0.047025,0.059669,0.042245,0.006348,0.027956,0.02949
2004.0,0.046498,0.041132,0.024651,0.035713,0.066093,0.063279,0.041687,0.054266,0.013589,0.030924,...,0.053967,0.040385,0.035973,0.062744,0.062358,0.095436,0.069519,0.05011,0.038481,0.045546
2005.0,0.088517,0.030293,0.0242,0.023219,0.069575,0.0398,0.043618,0.036468,0.050588,0.032105,...,0.064577,0.041693,0.027851,0.04201,0.034865,0.09072,0.053832,0.074747,0.034835,0.05277


In [36]:
# Tests de stationnarité 

# Liste pour stocker les résultats
results = []

# Grouper les données par pays
for n in range(0,71):
    # Extraire la série temporelle pour le pays
    series = GDP_25_growth.iloc[:,n].values
    
    # Appliquer les tests
    adf_result = adf_test(series)
    kpss_result = kpss_test(series)

    # Ajouter les résultats dans la liste
    results.append({
        "Pays": n,
        "ADF_Stat": adf_result["Statistique de test"],
        "ADF_p_value": adf_result["Valeur-p"],
        "ADF_Stationnaire": adf_result["Stationnaire"],
        "KPSS_Stat": kpss_result["Statistique de test"],
        "KPSS_p_value": kpss_result["Valeur-p"],
        "KPSS_Stationnaire": kpss_result["Stationnaire"],
    })

# Convertir les résultats en DataFrame
results_df = pd.DataFrame(results)

# Afficher les résultats
print(results_df)

look-up table. The actual p-value is greater than the p-value returned.

  result = kpss(series, regression='c', nlags='auto')
look-up table. The actual p-value is greater than the p-value returned.

  result = kpss(series, regression='c', nlags='auto')
look-up table. The actual p-value is greater than the p-value returned.

  result = kpss(series, regression='c', nlags='auto')
look-up table. The actual p-value is greater than the p-value returned.

  result = kpss(series, regression='c', nlags='auto')
look-up table. The actual p-value is greater than the p-value returned.

  result = kpss(series, regression='c', nlags='auto')
look-up table. The actual p-value is greater than the p-value returned.

  result = kpss(series, regression='c', nlags='auto')
look-up table. The actual p-value is greater than the p-value returned.

  result = kpss(series, regression='c', nlags='auto')
look-up table. The actual p-value is greater than the p-value returned.

  result = kpss(series, regression='c'

    Pays  ADF_Stat   ADF_p_value  ADF_Stationnaire  KPSS_Stat  KPSS_p_value  \
0      0 -0.982175  7.597099e-01             False   0.371240      0.089552   
1      1  0.025397  9.605827e-01             False   0.371916      0.089260   
2      2 -1.088410  7.196716e-01             False   0.370262      0.089973   
3      3 -6.912727  1.200839e-09              True   0.299290      0.100000   
4      4 -1.975649  2.973511e-01             False   0.451508      0.054954   
..   ...       ...           ...               ...        ...           ...   
66    66 -2.682574  7.710260e-02             False   0.185171      0.100000   
67    67 -1.552521  5.073889e-01             False   0.357164      0.095619   
68    68 -1.558175  5.045623e-01             False   0.384003      0.084050   
69    69 -0.867877  7.983668e-01             False   0.239055      0.100000   
70    70  0.462727  9.836951e-01             False   0.582974      0.024184   

    KPSS_Stationnaire  
0                True  
1  

look-up table. The actual p-value is greater than the p-value returned.

  result = kpss(series, regression='c', nlags='auto')
look-up table. The actual p-value is greater than the p-value returned.

  result = kpss(series, regression='c', nlags='auto')
look-up table. The actual p-value is greater than the p-value returned.

  result = kpss(series, regression='c', nlags='auto')
look-up table. The actual p-value is greater than the p-value returned.

  result = kpss(series, regression='c', nlags='auto')
look-up table. The actual p-value is greater than the p-value returned.

  result = kpss(series, regression='c', nlags='auto')


In [37]:
sum(results_df.ADF_Stationnaire == True)

8

In [38]:
sum(results_df.KPSS_Stationnaire == True)

57

=> Sur la série du taux d'évolution, 8 séries stationnaires selon ADF et 57 selon KPSS. 

In [39]:
GDP_25_growth.to_csv('GDP_yearly_25_growth', index=True)