In [62]:
# import libraries
import numpy as np
import pandas as pd

In [63]:
# read in dataset
data=pd.read_csv('Consumer Price Indices_Asean-raw.csv')
data.head()

Unnamed: 0,Domain Code,Domain,Area Code (M49),Area,Year Code,Year,Item Code,Item,Months Code,Months,Unit,Value,Flag,Flag Description,Note
0,CP,Consumer Price Indices,96,Brunei Darussalam,2000,2000,23013,"Consumer Prices, Food Indices (2015 = 100)",7001,January,,88.959933,F,FAO estimate,base year is 2015
1,CP,Consumer Price Indices,96,Brunei Darussalam,2001,2001,23013,"Consumer Prices, Food Indices (2015 = 100)",7001,January,,89.702503,F,FAO estimate,base year is 2015
2,CP,Consumer Price Indices,96,Brunei Darussalam,2002,2002,23013,"Consumer Prices, Food Indices (2015 = 100)",7001,January,,90.146512,F,FAO estimate,base year is 2015
3,CP,Consumer Price Indices,96,Brunei Darussalam,2003,2003,23013,"Consumer Prices, Food Indices (2015 = 100)",7001,January,,90.36532,F,FAO estimate,base year is 2015
4,CP,Consumer Price Indices,96,Brunei Darussalam,2004,2004,23013,"Consumer Prices, Food Indices (2015 = 100)",7001,January,,89.822766,F,FAO estimate,base year is 2015


In [64]:
# drop useless or duplicated variables
data=data.drop(['Domain Code', 'Domain','Area Code (M49)', 'Year Code',
            'Item Code','Months Code', 'Flag',
              'Flag Description','Note','Unit'], axis=1)
data.head()

Unnamed: 0,Area,Year,Item,Months,Value
0,Brunei Darussalam,2000,"Consumer Prices, Food Indices (2015 = 100)",January,88.959933
1,Brunei Darussalam,2001,"Consumer Prices, Food Indices (2015 = 100)",January,89.702503
2,Brunei Darussalam,2002,"Consumer Prices, Food Indices (2015 = 100)",January,90.146512
3,Brunei Darussalam,2003,"Consumer Prices, Food Indices (2015 = 100)",January,90.36532
4,Brunei Darussalam,2004,"Consumer Prices, Food Indices (2015 = 100)",January,89.822766


In [65]:
# split 'Months' column into separate columns
from pandas.core.reshape.reshape import pivot
data=data.pivot_table(index=['Area','Year','Item'],columns='Months',values='Value')
data=data.reset_index()
data.head()

Months,Area,Year,Item,April,August,December,February,January,July,June,March,May,November,October,September
0,Brunei Darussalam,2000,"Consumer Prices, Food Indices (2015 = 100)",89.152073,89.408259,89.664445,89.02398,88.959933,89.344212,89.280166,89.088026,89.216119,89.600398,89.536352,89.472305
1,Brunei Darussalam,2000,"Consumer Prices, General Indices (2015 = 100)",94.85625,95.560047,95.560047,94.465251,94.230652,95.325448,95.247248,94.77805,94.85625,95.403648,95.794646,95.638247
2,Brunei Darussalam,2001,"Consumer Prices, Food Indices (2015 = 100)",89.816677,89.968908,90.12114,89.740561,89.702503,89.93085,89.892793,89.778619,89.854735,90.083082,90.045024,90.006966
3,Brunei Darussalam,2001,"Consumer Prices, General Indices (2015 = 100)",96.185645,96.263845,92.588458,95.951046,95.872846,96.263845,96.263845,96.029246,96.185645,94.074253,96.420244,96.420244
4,Brunei Darussalam,2001,Food price inflation,0.745472,0.627067,0.509338,0.804931,0.834724,0.656604,0.686185,0.77518,0.715807,0.538707,0.568118,0.597571


In [66]:
# impute na using mean method
df1=data.iloc[:,3:15]
a1=np.where(np.isnan(df1))[0]
a2=np.where(np.isnan(df1))[1]
for i in a1:
    for j in a2:
        df1.iloc[i,j]=df1.iloc[i].mean()

In [67]:
# add new column by mean value of months
data['Value']=df1.apply(lambda x: x.mean(),axis=1)

In [68]:
#drop months columns
data=data.drop(['January','February','March','April','May','June','July','August','September','October','November','December'], axis=1)
data.head()

Months,Area,Year,Item,Value
0,Brunei Darussalam,2000,"Consumer Prices, Food Indices (2015 = 100)",89.312189
1,Brunei Darussalam,2000,"Consumer Prices, General Indices (2015 = 100)",95.142982
2,Brunei Darussalam,2001,"Consumer Prices, Food Indices (2015 = 100)",89.911821
3,Brunei Darussalam,2001,"Consumer Prices, General Indices (2015 = 100)",95.70993
4,Brunei Darussalam,2001,Food price inflation,0.671642


In [69]:
#split the 'Item' column into Food Indices, General Indices, and inflation rates
data=data.pivot_table(index=['Area','Year'],columns='Item',values='Value')
data=data.reset_index()
data.head()

Item,Area,Year,"Consumer Prices, Food Indices (2015 = 100)","Consumer Prices, General Indices (2015 = 100)",Food price inflation
0,Brunei Darussalam,2000,89.312189,95.142982,
1,Brunei Darussalam,2001,89.911821,95.70993,0.671642
2,Brunei Darussalam,2002,90.286058,93.494272,0.416298
3,Brunei Darussalam,2003,90.033759,93.74359,-0.279126
4,Brunei Darussalam,2004,90.485887,94.5305,0.503782


In [71]:
#split into different datasets by Area
df_Brunei=data[data['Area'].isin(['Brunei Darussalam'])].iloc[:,1:]
df_Indonesia=data[data['Area'].isin(['Indonesia'])].iloc[:,1:]
df_Malaysia=data[data['Area'].isin(['Malaysia'])].iloc[:,1:]
df_VietNam=data[data['Area'].isin(['Viet Nam'])].iloc[:,1:]
df_Cambodia=data[data['Area'].isin(['Cambodia'])].iloc[:,1:]

In [72]:
#save data
df_Brunei.to_csv(r'Consumer Price indices_Brunei.csv',index=False)
df_Indonesia.to_csv(r'Consumer Price indices_Indonesia.csv',index=False)
df_Malaysia.to_csv(r'Consumer Price indices_Malaysia.csv',index=False)
df_VietNam.to_csv(r'Consumer Price indices_VietNam.csv',index=False)
df_Cambodia.to_csv(r'Consumer Price indices_Cambodia.csv',index=False)
data.to_csv(r'Consumer Price indices_ASEAN-cleaned.csv',index=False)