In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [169]:
import datetime
import json

In [382]:
# Add income of UA budget
with open('budget_income.json') as b:
    budget_income_dict = json.loads(b.read())
    
# Add expenses of UA budget
with open('budget_expenses.json') as b_e:
    budget_expenses_dict = json.loads(b_e.read())
    
# Add consumer price index
with open('inflation.json', 'r') as inf:
    inflation_dict = json.loads(inf.read())
    
# Add incomes of all Ukrainian banks, except of NBU
with open('bank_inc.json', 'r') as bank_inc:
    bank_income_dict = json.loads(bank_inc.read())

# Add expenses of all Ukrainian banks, except of NBU
with open('bank_exp.json', 'r') as bank_exp:
    bank_expenses_dict = json.loads(bank_exp.read())
    
# Add amount of Ukrainian export
with open('export.json', 'r') as export:
    ua_export_dict = json.loads(export.read())
    
# Add amount of Ukrainian import
with open('import.json', 'r') as ua_imp:
    ua_import_dict = json.loads(ua_imp.read())
    
# Add gross external debt
with open('grossextdebt.json', 'r') as debt:
    grossextdebt_dict = json.loads(debt.read())

# Add Ukrainian GDP
with open('vvp.json', 'r') as gdp:
    gross_domestic_product_dict = json.loads(gdp.read())

# Add trends of exchange interest
interest_trend = pd.read_csv('kurs.csv')


In [408]:
# Leave behind useless texts
def df_cleaner(new_dict):
    
    # We need only these three columns
    new_cols = ['freq', 'value', 'date']
    df = pd.DataFrame(new_dict)
    
    #  A condition to choose only absolute values of import/export
    if df['tzep'][0] == 'F_':
        df = df.where(df['tzep'] == 'F_')
     
    # Rename columns
    df.rename(columns={'dt' : 'date'}, inplace = True)
    df.rename(columns={'value' : df['txt'][0]})
    df['date'] = pd.to_datetime(df['date'])
    df = df.sort_values(by='date')
    
    # Throw away useless columns
    for col in list(df.columns.values):
        if col not in new_cols:
            df = df.drop(col, axis=1)
    
    # Return clean data
    return df
    

In [384]:
# Set frequency of data to month
def to_frequency(dataframe, frequency = 'M'):
    dataframe = dataframe.where(dataframe['freq'] == frequency)
    dataframe = dataframe.dropna(axis=0, how='all')
    dataframe.index = range(len(dataframe))
    return dataframe

In [404]:
# Special function to clean and regroup the inflation data
def select_total(dictionary):
    df = pd.DataFrame(dictionary)
    
    # 'ku' - is for region of Ukraine. I choose 'ku' == None and change it 
    # to the value, out of range of Ukrainian regions.
    # 'tzep' - is for absolute values.
    df.fillna(value='100500', inplace=True)
    df = df.where(df['ku'] == '100500')
    df = df.where(df['tzep'] == 'DTPY_')  
    
    # Rename columns
    df.rename(columns={'dt' : 'date'}, inplace = True)
    df.rename(columns={'value' : df['txten'][0]})
    
    # Convert date column to datetime type and sort data by date
    df['date'] = pd.to_datetime(df['date'])
    df = df.sort_values(by='date')
    df = df.where(df['mcrd081'] == 'Total').dropna(axis=0, how='all')
    
    # Delete all the useless information
    new_cols = ['freq', 'value', 'date']
    for col in list(df.columns.values):
        if col not in new_cols:
            df = df.drop(col, axis=1
                         
    # Return 
    return df

In [406]:
# Define dataset
df_budget_income = to_frequency(df_cleaner(budget_income_dict), 'M')

df_budget_expenses = to_frequency(df_cleaner(budget_expenses_dict), 'M')

df_bank_income = to_frequency(df_cleaner(bank_income_dict), 'M')

df_bank_expenses = to_frequency(df_cleaner(bank_expenses_dict), 'M')

df_import = to_frequency(df_cleaner(ua_import_dict), 'Q') # pd.DataFrame(ua_import_dict) 
                          
df_export = to_frequency(df_cleaner(ua_export_dict), 'Q') # pd.DataFrame(ua_export_dict)
                         
df_grossextdebt = to_frequency(df_cleaner(grossextdebt_dict), 'Q')
                         
df_gdp = to_frequency(df_cleaner(gross_domestic_product_dict), 'Q') # pd.DataFrame(gross_domestic_product_dict)

df_inflation = to_frequency(select_total(inflation_dict), 'M') #pd.DataFrame(inflation_dict) 

In [407]:
df_inflation

Unnamed: 0,date,freq,value
0,2007-02-01,M,0.5
1,2007-03-01,M,1.1
2,2007-04-01,M,1.3
3,2007-05-01,M,1.3
4,2007-06-01,M,1.9
5,2007-07-01,M,4.2
6,2007-08-01,M,5.6
7,2007-09-01,M,6.2
8,2007-10-01,M,8.6
9,2007-11-01,M,11.7
