# Imports

In [1]:
!pip install lxml



In [2]:
!pip install mtranslate==1.8



In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import mtranslate as mt

# General Funtions

In [4]:
def trimester_to_date(trimester):
    year, quarter = trimester.split('-')
    quarter=quarter[1]
    month = int(quarter) * 3
    return pd.Timestamp(year=int(year), month=month, day=1)

In [5]:
def translate_country_name(text):
    return mt.translate(text, 'en', 'fr')

# Country Codes

In [6]:
codes=pd.read_html('https://www.iban.com/country-codes')
cases=pd.read_csv('global\dataglobal\clean_weekly_cases_per_million.csv',parse_dates=True)
cases['date']=pd.to_datetime(cases['date'])

In [7]:
cases=cases[(cases['date']>=pd.Timestamp(year=2021,month=1,day=1)) & (cases['date']<pd.Timestamp(year=2023,month=1,day=1))][['Countries','Cases']]
cases=cases.groupby('Countries').mean()
codes=pd.DataFrame(codes[0])[['Country','Alpha-3 code']]

In [8]:
codes['Country']=codes.Country.replace({'United States of America (the)':'United States','Venezuela (Bolivarian Republic of)':'Venezuela',
               'Bolivia (Plurinational State of)':'Bolivia','Côte d\'Ivoire':'Cote d\'Ivoire','Russian Federation (the)':'Russia',
               'United Kingdom of Great Britain and Northern Ireland (the)':'United Kingdom','Viet Nam':'Vietnam',
               'Lao People\'s Democratic Republic (the)':'Laos','Korea (the Republic of)':'South Korea'})

In [9]:
codes=codes.set_index('Country')
df=cases.join(codes,how='inner')
df.columns=['Cases','Alpha3']
df.to_csv('global\dataglobal\clean_countrycodes.csv')

# Covid Data

In [10]:
stringency=pd.read_json('global\stringency.json')

stringency=stringency.drop(['stringency_index_nonvac','stringency_index_vac','stringency_index_weighted_avg'],axis=1)
stringency.rename(columns={'location':'Countries'},inplace=True)

stringency.to_csv('global\dataglobal\clean_stringency.csv')

In [11]:
deaths=pd.read_csv('global\weekly_deaths_per_million.csv',parse_dates=True)

deaths['date']=pd.to_datetime(deaths['date'])
deaths = pd.melt(deaths, id_vars=['date'], var_name='Countries', value_name='Deaths')

deaths.to_csv('global\dataglobal\clean_weekly_deaths_per_million.csv')

In [12]:
cases=pd.read_csv('global\weekly_cases_per_million.csv',parse_dates=True)

cases['date']=pd.to_datetime(cases['date'])
cases = pd.melt(cases, id_vars=['date'], var_name='Countries', value_name='Cases')

cases.to_csv('global\dataglobal\clean_weekly_cases_per_million.csv')

In [13]:
#merge
temp = pd.merge(right=stringency,left=cases,on=['date','Countries'],how='left')
covid_df = pd.merge(right=temp,left=deaths,on=['date','Countries'],how='left')

covid_df.to_csv("global/dataglobal/clean_covid_recap.csv")

# Employment

In [14]:
employment=pd.read_csv('Global_original/employment.csv')
employment=employment[employment['UNIT_MEASURE']=='H']
employment=employment[employment['Transaction']=='Emploi total']
employment=employment[employment['ADJUSTMENT']=='Y']

In [15]:
employment2=employment.drop(['STRUCTURE','STRUCTURE_ID','STRUCTURE_NAME','ACTION','FREQ',
       'ADJUSTMENT', 'Ajustement', 'REF_AREA','SECTOR','Secteur institutionnel','COUNTERPART_SECTOR','Secteur institutionnel de contrepartie',
       'TRANSACTION','INSTR_ASSET','Instruments financiers et actifs non financiers','ACTIVITY','EXPENDITURE','Dépense','UNIT_MEASURE','PRICE_BASE',
       'Type de prix','TRANSFORMATION','Transformation','TABLE_IDENTIFIER','Identifiant de tableau','Période temporelle','Valeur d\'observation',
       'REF_YEAR_PRICE','Prix ​​année de référence','BASE_PER','Période de base','CONF_STATUS','Statut de confidentialité','DECIMALS','Décimales',
       'OBS_STATUS','Statut d\'observation','UNIT_MULT','CURRENCY','Monnaie'],axis=1)

employment2.TIME_PERIOD=employment2.TIME_PERIOD.astype(str)
employment2=employment2[employment2['Fréquence d\'observation']=='Trimestrielle']
employment2.OBS_VALUE=employment2.OBS_VALUE.astype(float)

In [16]:
for country in employment2['Zone de référence'].unique():
    for activite in employment2['Activité économique'].unique():
        a=employment2[(employment2['Zone de référence']==country) & (employment2['Activité économique']==activite)]['OBS_VALUE'].pct_change()
        employment2.loc[(employment2['Zone de référence']==country) & (employment2['Activité économique']==activite),'OBS_VALUE']=a

employment2=employment2.dropna(subset='OBS_VALUE')
employment2.loc[:,'TIME_PERIOD']=employment2.TIME_PERIOD.apply(lambda x: trimester_to_date(x))
employment2['TIME_PERIOD']=pd.to_datetime(employment2['TIME_PERIOD'])

In [17]:
translate_dic = {}
for country in list(employment2['Zone de référence'].unique()):
    translate_dic[country] = translate_country_name(country)
    
    
employment2['Zone de référence'] = employment2['Zone de référence'].replace(translate_dic)

In [18]:
employment2.to_csv('global/dataglobal/clean_employment.csv')

# Employee Compensation

In [19]:
compensation=pd.read_csv('Global_original/employeecompensation.csv')
compensation=compensation[compensation['FREQ']=='Q']
compensation=compensation[compensation['ADJUSTMENT']=='Y']

In [20]:
compensation2=compensation.drop(['STRUCTURE','STRUCTURE_ID','ACTION','FREQ','Fréquence d\'observation','ADJUSTMENT','Ajustement',
                                 'REF_AREA','SECTOR','COUNTERPART_SECTOR','Secteur institutionnel de contrepartie','TRANSACTION','INSTR_ASSET','Instruments financiers et actifs non financiers',
                                 'ACTIVITY','EXPENDITURE','Dépense','UNIT_MEASURE','Unité de mesure','PRICE_BASE','Type de prix','TRANSFORMATION','Transformation',
                                 'TABLE_IDENTIFIER','Identifiant de tableau','Période temporelle','Valeur d\'observation','REF_YEAR_PRICE','Prix ​​année de référence',
                                 'BASE_PER','Période de base','CONF_STATUS','Statut de confidentialité','DECIMALS','Décimales','OBS_STATUS','Statut d\'observation',
                                 'UNIT_MULT','CURRENCY','Monnaie'],axis=1)

compensation2.TIME_PERIOD=compensation2.TIME_PERIOD.astype(str)
compensation2.OBS_VALUE=compensation2.OBS_VALUE.astype(float)

In [21]:
for country in compensation2['Zone de référence'].unique():
    for activite in compensation2[compensation2['Zone de référence']==country]['Activité économique'].unique():
        a=compensation2[(compensation2['Zone de référence']==country) & (compensation2['Activité économique']==activite)]['OBS_VALUE'].pct_change()
        compensation2.loc[(compensation2['Zone de référence']==country) & (compensation2['Activité économique']==activite),'OBS_VALUE']=a

compensation2=compensation2.dropna(subset='OBS_VALUE')
compensation2.loc[:,'TIME_PERIOD']=compensation2.TIME_PERIOD.apply(lambda x: trimester_to_date(x))
compensation2['TIME_PERIOD']=pd.to_datetime(compensation2['TIME_PERIOD'])

In [22]:
translate_dic = {}
for country in list(compensation2['Zone de référence'].unique()):
    translate_dic[country] = translate_country_name(country)
    
    
compensation2['Zone de référence'] = compensation2['Zone de référence'].replace(translate_dic)

In [23]:
compensation2.to_csv('global/dataglobal/clean_employee_compensation.csv')

# Final Consumption Expenditure

### Europe

In [24]:
consumption = pd.read_csv("Global_original/finalconsumptionexpenditure.csv")

for column in consumption.columns:
    if (len(consumption[column].unique()) <= 1) and (column not in ["Secteur institutionnel"]):
        consumption.drop(columns=[column],inplace=True)

consumption = consumption[consumption["FREQ"] == "Q"]
consumption = consumption[consumption["Ajustement"] == "Corrigé des variations saisonnières et des effets de calendrier"]
consumption = consumption[consumption["Type de prix"]=="Volumes chaînés"]
consumption = consumption[consumption["Transaction"]=="Dépense de consommation finale des résidents et non-résidents sur le territoire économique"]

consumption.drop(columns=["Fréquence d'observation","FREQ","ADJUSTMENT","Ajustement","REF_AREA","TRANSACTION","Transaction","Type de prix","PRICE_BASE","OBS_STATUS","REF_YEAR_PRICE","Statut d'observation","Monnaie"], inplace= True)

  consumption = pd.read_csv("Global_original/finalconsumptionexpenditure.csv")


In [25]:
#Just checking that I still have all of the years

lista = sorted(list(consumption["TIME_PERIOD"].unique()))
lista_anos = []
for i in lista:
    try: 
        int(i)
        lista_anos.append(i)

    except:
        pass

for i in range(len(lista_anos)-1):
    if int(lista_anos[i+1]) != int(lista_anos[i]) + 1:
        print(lista_anos[i],lista_anos[i+1])

In [26]:
for country in list(consumption['Zone de référence'].unique()):
    consumption.loc[consumption['Zone de référence']==country,"OBS_VALUE"] = consumption[consumption['Zone de référence']==country]["OBS_VALUE"].pct_change()

consumption.TIME_PERIOD = consumption.TIME_PERIOD.apply(lambda x: trimester_to_date(x))

In [27]:
translate_dic = {}
for country in list(consumption['Zone de référence'].unique()):
    translate_dic[country] = translate_country_name(country)
    
    
consumption['Zone de référence'] = consumption['Zone de référence'].replace(translate_dic)

### Brazil

In [28]:
consumption_brazil = pd.read_csv("data_manu/ Real Final Consumption Expenditure for Brazil.csv")
consumption_brazil

consumption_brazil.insert(loc=0, column='Zone de référence', value=['Brazil']*len(consumption_brazil))
consumption_brazil.insert(loc=1, column='Secteur institutionnel', value=['Ménages']*len(consumption_brazil))
consumption_brazil.insert(loc=4, column='CURRENCY', value=['BRL']*len(consumption_brazil))
consumption_brazil.rename(columns={'DATE':'TIME_PERIOD','NCRSAXDCBRQ':'OBS_VALUE'},inplace=True)

In [29]:
for country in list(consumption_brazil['Zone de référence'].unique()):
    consumption_brazil.loc[consumption_brazil['Zone de référence']==country,"OBS_VALUE"] = consumption_brazil[consumption_brazil['Zone de référence']==country]["OBS_VALUE"].pct_change()

In [30]:
translate_dic = {}
for country in list(consumption_brazil['Zone de référence'].unique()):
    translate_dic[country] = translate_country_name(country)
    
    
consumption_brazil['Zone de référence'] = consumption_brazil['Zone de référence'].replace(translate_dic)

### China

In [31]:
consumption_china = pd.read_csv("data_manu/Final consumption expenditure (constant LCU) - China.csv")

consumption_china.dropna(how='all', inplace=True)
consumption_china.dropna(subset=['2022'], inplace=True)
consumption_china.drop(columns=["Country Code", "Indicator Code"], inplace=True)

In [32]:
not_usefull = ["Australia","Austria", "Belgium", "Bulgaria", "Brazil", "Canada", "Costa Rica", "Switzerland", "Spain", "Finland", "France", "United Kingdom",
                "Ireland", "Netherlands", "Poland", "Ukraine", "United States", "Italy", "Luxembourg", "Lithuania", "Croatia", "Hungary", 
                "Iceland", "Norway", "Portugal", "Romania", "New Zealand", "Albania", "Belarus", "Armenia", "Cyprus", "Germany", "Denmark",
                "Estonia", "Georgia", "Greece", "Czechia", "Latvia", "Malta", "Moldova", "Montenegro", "Serbia", "Slovakia", "Slovenia", "Sweden", 
                "Kosovo"]

consumption_china = consumption_china[~(consumption_china["Country Name"].isin(not_usefull))]

In [33]:
consumption_china = pd.melt(consumption_china, id_vars=['Country Name', 'Indicator Name'], var_name='TIME_PERIOD', value_name='OBS_VALUE')

consumption_china = consumption_china[consumption_china["TIME_PERIOD"] != "Unnamed: 67"]
consumption_china.sort_values(by="Country Name", inplace=True)

consumption_china.drop(columns=['Indicator Name'],inplace=True)
consumption_china.insert(loc=1, column='Secteur institutionnel', value=['Ménages']*len(consumption_china))
consumption_china.insert(loc=4, column='CURRENCY', value=['NAN']*len(consumption_china))
consumption_china.rename(columns={'Country Name':'Zone de référence'},inplace=True)

In [34]:
for country in list(consumption_china['Zone de référence'].unique()):
    consumption_china.loc[consumption_china['Zone de référence']==country,"OBS_VALUE"] = consumption_china[consumption_china['Zone de référence']==country]["OBS_VALUE"].pct_change()

  consumption_china.loc[consumption_china['Zone de référence']==country,"OBS_VALUE"] = consumption_china[consumption_china['Zone de référence']==country]["OBS_VALUE"].pct_change()


In [35]:
for i in list(consumption['Zone de référence'].unique()):
    if i in list(consumption_china['Zone de référence'].unique()):
        print(i)

Slovak Republic
Colombia
Japan
Israel
Chile


### Concatenating everything

In [36]:
consumption = pd.concat([consumption, consumption_brazil, consumption_china])
consumption.sort_values(by=["Zone de référence","TIME_PERIOD"], inplace=True)

In [37]:
consumption.to_csv("global/dataglobal/clean_finalconsumptionexpenditure.csv")

# GFCF

### Europe

In [38]:
europe_gfcf = pd.read_csv("Global_original/GFCF.csv")

for column in europe_gfcf.columns:
    if (len(europe_gfcf[column].unique()) <= 1) and (column not in ["Secteur institutionnel"]):
        europe_gfcf.drop(columns=[column],inplace=True)

europe_gfcf = europe_gfcf[europe_gfcf["FREQ"] == "Q"]
europe_gfcf = europe_gfcf[europe_gfcf["Ajustement"] == "Corrigé des variations saisonnières et des effets de calendrier"]
europe_gfcf = europe_gfcf[europe_gfcf["Type de prix"]=="Volumes chaînés"]
europe_gfcf = europe_gfcf[europe_gfcf["Instruments financiers et actifs non financiers"].isin(['Actifs fixes, brut','Logements, brut'])]

europe_gfcf.drop(columns=["Fréquence d'observation","FREQ","ADJUSTMENT","Ajustement","REF_AREA","INSTR_ASSET","Type de prix","PRICE_BASE","OBS_STATUS","REF_YEAR_PRICE","Statut d'observation","Monnaie",'CURRENCY'], inplace= True)

In [39]:
#Just checking that I still have all of the years

lista = sorted(list(europe_gfcf["TIME_PERIOD"].unique()))
lista_anos = []
for i in lista:
    try: 
        int(i)
        lista_anos.append(i)

    except:
        pass

for i in range(len(lista_anos)-1):
    if int(lista_anos[i+1]) != int(lista_anos[i]) + 1:
        print(lista_anos[i],lista_anos[i+1])

In [40]:
for country in list(europe_gfcf['Zone de référence'].unique()):
    europe_gfcf.loc[europe_gfcf['Zone de référence']==country,"OBS_VALUE"] = europe_gfcf[europe_gfcf['Zone de référence']==country]["OBS_VALUE"].pct_change(periods=4)

europe_gfcf.TIME_PERIOD = europe_gfcf.TIME_PERIOD.apply(lambda x: trimester_to_date(x))

In [41]:
translate_dic = {}
for country in list(europe_gfcf['Zone de référence'].unique()):
    translate_dic[country] = translate_country_name(country)
    
    
europe_gfcf['Zone de référence'] = europe_gfcf['Zone de référence'].replace(translate_dic)

### Out of Europe

Brazil

In [53]:
br_gfcf =pd.read_csv('data_manu/gfcf_brazil.csv')
br_gfcf["NFIRSAXDCBRQ"] = br_gfcf["NFIRSAXDCBRQ"].pct_change(periods=4)
br_gfcf = br_gfcf[br_gfcf["DATE"] >= "1997-01-01"]
br_gfcf.rename(columns={'DATE': 'TIME_PERIOD','BRAGFCFQDSMEI': 'OBS_VALUE'}, inplace=True)
br_gfcf['Country Name'] = "Brazil"

South Africa

In [54]:
sa_gfcf =pd.read_csv('data_manu/gfcf_south_africa.csv')
sa_gfcf["NFIRSAXDCZAQ"] = sa_gfcf["NFIRSAXDCZAQ"].pct_change(periods=4)
sa_gfcf = sa_gfcf[sa_gfcf["DATE"] >= "1994-01-01"]
sa_gfcf.rename(columns={'DATE': 'TIME_PERIOD','NFIRSAXDCZAQ': 'OBS_VALUE'}, inplace=True)
sa_gfcf['Country Name'] = "South Africa"

Indonesia

In [55]:
ids_gfcf =pd.read_csv('data_manu/gfcf_indonesia.csv')
ids_gfcf["NFIRSAXDCIDQ"] = ids_gfcf["NFIRSAXDCIDQ"].pct_change(periods=4)
ids_gfcf = ids_gfcf[ids_gfcf["DATE"] >= "2001-01-01"]
ids_gfcf.rename(columns={'DATE': 'TIME_PERIOD','NFIRSAXDCIDQ': 'OBS_VALUE'}, inplace=True)
ids_gfcf['Country Name'] = "Indonesia"

India

In [56]:
india_gfcf =pd.read_csv('data_manu/gfcf_india.csv')
india_gfcf = india_gfcf[india_gfcf["DATE"] >= "2005-01-01"]

india_gfcf["NFIRNSAXDCINQ"] = india_gfcf["NFIRNSAXDCINQ"].pct_change(periods=4)
india_gfcf.rename(columns={'DATE': 'TIME_PERIOD', 'NFIRNSAXDCINQ': 'OBS_VALUE'}, inplace=True)

india_gfcf = india_gfcf[india_gfcf["TIME_PERIOD"] >= "2006-01-01"]
india_gfcf['Country Name'] = "India"

other african countries GFCF

In [58]:
other_gfcf = pd.read_csv('data_manu/gfcf_costa_rica.csv')

other_gfcf.dropna(how='all', inplace=True)
other_gfcf.dropna(subset=['2022'], inplace=True)
other_gfcf.drop(columns=["Country Code", "Indicator Code"], inplace=True)

In [59]:
not_usefull = ["Australia","Austria", "Belgium", "Bulgaria", "Brazil", "Canada", "Switzerland", "Spain", "Finland", "France", "United Kingdom",
                "Ireland", "Netherlands", "Poland", "Ukraine", "United States", "Italy", "Luxembourg", "Lithuania", "Croatia", "Hungary", 
                "Iceland", "Norway", "Portugal", "Romania", "New Zealand", "Albania", "Belarus", "Armenia", "Cyprus", "Germany", "Denmark",
                "Estonia", "Georgia", "Greece", "Czechia", "Latvia", "Malta", "Moldova", "Montenegro", "Serbia", "Slovakia", "Slovenia", "Sweden", 
                "Kosovo"]

other_gfcf = other_gfcf[~(other_gfcf["Country Name"].isin(not_usefull))]

In [60]:
other_gfcf = pd.melt(other_gfcf, id_vars=['Country Name', 'Indicator Name'], var_name='TIME_PERIOD', value_name='OBS_VALUE')

other_gfcf = other_gfcf[other_gfcf["TIME_PERIOD"] != "Unnamed: 67"]
other_gfcf.sort_values(by="Country Name", inplace=True)

In [61]:
for country in list(other_gfcf['Country Name'].unique()):
    other_gfcf.loc[other_gfcf['Country Name']==country,"OBS_VALUE"] = other_gfcf[other_gfcf['Country Name']==country]["OBS_VALUE"].pct_change(periods=4)

  other_gfcf.loc[other_gfcf['Country Name']==country,"OBS_VALUE"] = other_gfcf[other_gfcf['Country Name']==country]["OBS_VALUE"].pct_change(periods=4)


### Concatenating everything

In [62]:
other_gfcf = pd.concat([other_gfcf, india_gfcf, br_gfcf, sa_gfcf, ids_gfcf])
other_gfcf.sort_values(by="Country Name", inplace=True)

In [63]:
other_gfcf.drop(columns=['Indicator Name'],inplace=True)
other_gfcf.rename(columns={'Country Name':'Zone de référence'},inplace=True)
other_gfcf.insert(loc=1, column='Secteur institutionnel', value=['Economie totale']*len(other_gfcf))
other_gfcf.insert(loc=2, column='Instruments financiers et actifs non financiers', value=['Actifs fixes, brut']*len(other_gfcf))

In [64]:
gfcf = pd.concat([other_gfcf, europe_gfcf])
gfcf.sort_values(by=["Zone de référence","TIME_PERIOD"], inplace=True)

In [65]:
gfcf.to_csv("global/dataglobal/clean_gfcf.csv")

# GDP

In [47]:
real_gdp =pd.read_csv('Global_original/realgdpoecd.csv')

for column in real_gdp.columns:
    if (len(real_gdp[column].unique()) <= 1) and (column != "Unité de mesure"):
        real_gdp.drop(columns=[column],inplace=True)

#leave only the trimesters values
real_gdp = real_gdp[real_gdp["FREQ"] == "Q"]
real_gdp.TIME_PERIOD = real_gdp.TIME_PERIOD.apply(lambda x: trimester_to_date(x))
real_gdp = real_gdp[real_gdp["Secteur institutionnel"] == "Economie totale"]
real_gdp = real_gdp[real_gdp["Transaction"] == "Produit intérieur brut"]
real_gdp = real_gdp[real_gdp["Transformation"] == "Taux de croissance, sur 1 an"]

#dropping unimportant columns
real_gdp.drop(columns=["FREQ", "Fréquence d'observation", "TRANSACTION", "SECTOR", "REF_YEAR_PRICE",
                        "ACTIVITY", "EXPENDITURE", "TRANSFORMATION","REF_AREA", "OBS_STATUS","Statut d'observation","Dépense"], inplace=True)

real_gdp.to_csv("global/dataglobal/clean_realgdp.csv")


In [48]:
real_gdp20 =pd.read_csv('Global_original/realgdpg20.csv')

for column in real_gdp20.columns:
    if (len(real_gdp20[column].unique()) <= 1) and (column != "Unité de mesure"):
        real_gdp20.drop(columns=[column],inplace=True)

#leave only the trimesters values
real_gdp20 = real_gdp20[real_gdp20["FREQ"] == "Q"]
real_gdp20.TIME_PERIOD = real_gdp20.TIME_PERIOD.apply(lambda x: trimester_to_date(x))

real_gdp20 = real_gdp20[real_gdp20["Secteur institutionnel"] == "Economie totale"]
real_gdp20 = real_gdp20[real_gdp20["Transaction"] == "Produit intérieur brut"]
real_gdp20 = real_gdp20[real_gdp20["Transformation"] == "Taux de croissance, sur 1 an"]
        
real_gdp20.drop(columns=["FREQ", "REF_AREA", "OBS_STATUS", "Statut d'observation", "Dépense", "Fréquence d'observation", "TRANSACTION", "SECTOR", "REF_YEAR_PRICE",
                        "ACTIVITY", "EXPENDITURE", "TRANSFORMATION"], inplace=True)

real_gdp20.to_csv("global/dataglobal/clean_realgdpg20.csv")

In [74]:
egp_gdp =pd.read_csv('data_manu/gdp_egypt.csv')

egp_gdp.drop(columns=["Source"], inplace= True)

egp_gdp.to_csv("global/dataglobal/clean_gdp_gdp.csv")

In [82]:
nig_gdp =pd.read_csv('data_manu/gdp_nigeria.csv', index_col=False)

nig_gdp = nig_gdp[["year", "Period", "GDP at 2010 Constant Basic Prices", "GDP at 2010 Constant Market Prices" ]]
nig_annual = nig_gdp[(nig_gdp["Period"] == "Annual") & (nig_gdp["year"] <= 2010)]
nig_quart = nig_gdp[(nig_gdp["Period"] != "Annual") & (nig_gdp["year"] >= 2010)]

nig_annual["GDP at 2010 Constant Basic Prices"] = nig_annual["GDP at 2010 Constant Basic Prices"].pct_change()
nig_annual = nig_annual[nig_annual["year"] != 1981]

nig_quart["GDP at 2010 Constant Basic Prices"] = nig_quart["GDP at 2010 Constant Basic Prices"].pct_change(periods=4)
nig_quart = nig_quart[nig_quart["year"] != 2010]

nig_gdp = pd.concat([nig_annual, nig_quart])
nig_gdp.drop(columns=["GDP at 2010 Constant Market Prices"], inplace=True)

nig_gdp.to_csv("global/dataglobal/clean_nig_gdp.csv")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nig_annual["GDP at 2010 Constant Basic Prices"] = nig_annual["GDP at 2010 Constant Basic Prices"].pct_change()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nig_quart["GDP at 2010 Constant Basic Prices"] = nig_quart["GDP at 2010 Constant Basic Prices"].pct_change(periods=4)


Second part

In [84]:
egypt=pd.read_csv('global\dataglobal\clean_gdp_gdp.csv',index_col='Unnamed: 0')
nigeria=pd.read_csv('global\dataglobal\clean_nig_gdp.csv',index_col='Unnamed: 0')
gdp=pd.read_csv('global/dataglobal/clean_realgdp.csv',index_col='Unnamed: 0')
gdp20=pd.read_csv('global/dataglobal/clean_realgdpg20.csv',index_col='Unnamed: 0')

In [85]:
gdp20countries=[i for i in gdp20['Zone de référence'].unique() if i not in gdp['Zone de référence'].unique()]

gdp20=gdp20[gdp20['Zone de référence'].isin(gdp20countries)]

gdp=pd.concat([gdp,gdp20])

In [86]:
egypt['Fiscal Year']=egypt['Fiscal Year'].apply(lambda x: x[:4])
egypt['TIME_PERIOD']=egypt['Fiscal Year']+'-'+egypt['Quarter']
egypt['TIME_PERIOD']=egypt['TIME_PERIOD'].apply(lambda x: trimester_to_date(x))
egypt=egypt.drop(['Indicator','Unit','Quarter','Fiscal Year'],axis=1)
egypt[['Zone de référence','Secteur institutionnel','Transaction','Activité économique','Unité de mesure','Transformation']]='Egypte','Economie totale','Produit intérieur brut','Non applicable','Changement en pourcentage','Taux de croissance, sur 1 an'
egypt=egypt.rename(columns={'Real Growth Rate':'OBS_VALUE'})

gdp=pd.concat([gdp,egypt])

In [87]:
nigeria['Period']=nigeria['Period'].replace({'Annual':'Q1'})
nigeria['TIME_PERIOD']=nigeria['year'].astype(str)+'-'+nigeria['Period']
nigeria[['Zone de référence','Secteur institutionnel','Transaction','Activité économique','Unité de mesure','Transformation']]='Nigeria','Economie totale','Produit intérieur brut','Non applicable','Changement en pourcentage','Taux de croissance, sur 1 an'
nigeria=nigeria.drop(['year','Period'],axis=1)
nigeria=nigeria.rename(columns={'GDP at 2010 Constant Basic Prices':'OBS_VALUE'})
nigeria['OBS_VALUE']=nigeria['OBS_VALUE']*100
nigeria['TIME_PERIOD']=nigeria['TIME_PERIOD'].apply(lambda x: trimester_to_date(x))

gdp=pd.concat([gdp,nigeria])

In [88]:
translate_dic = {}
for country in list(gdp['Zone de référence'].unique()):
    translate_dic[country] = translate_country_name(country)
    
    
gdp['Zone de référence'] = gdp['Zone de référence'].replace(translate_dic)

In [None]:
gdp.to_csv('global\dataglobal\clean_gdp_recap.csv')