In [4]:
import os, sys
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

In [17]:
# convert time series data to supervised data
def to_supervised(src_path, dst_path, index_y, n_in, n_out):
    country_dict = walk(src_path)
    for k, v in country_dict.items():
        v = v.fillna(0)
        values = v.values.astype("float64")
        df = generate_supervised_data(values, index_y, n_in, n_out)
        df = df.drop(df.columns[[i for i in range(((n_in*values.shape[1])+1), df.shape[1])]], axis=1)
        df.to_csv(dst_path + k + "_supervised.csv")
    
def generate_supervised_data(data, index_y, n_in=1, n_out=1, drop=True):
    n_vars = data.shape[1]
    df = pd.DataFrame(data)
    cols = []
    names = []
    
    # shift the data by the timestep
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
            
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    if drop:
        agg = agg.dropna()
    return agg
    
    
def walk(path):
    d = {}
    idx = 0
    for root, dir, files in os.walk(path):
        if idx == 1:
            break
        for file in files[:173]:
            d[file.split(".")[0]] = pd.read_csv(os.path.join(root, file), index_col=0)
        idx += 1
    return d

to_supervised("./country_csv/", "./country_csv/supervised_1/", 0, 1, 1)


dict_keys(['AFG', 'AGO', 'ALB', 'AND', 'ARE', 'ARG', 'AUS', 'AUT', 'AZE', 'BDI', 'BEL', 'BEN', 'BFA', 'BGD', 'BGR', 'BHR', 'BHS', 'BIH', 'BLR', 'BLZ', 'BOL', 'BRA', 'BRB', 'BRN', 'BTN', 'BWA', 'CAF', 'CAN', 'CHE', 'CHL', 'CHN', 'CIV', 'CMR', 'COD', 'COG', 'COL', 'COM', 'CPV', 'CRI', 'CUB', 'CYP', 'CZE', 'DEU', 'DJI', 'DMA', 'DNK', 'DOM', 'DZA', 'ECU', 'EGY', 'ERI', 'ESP', 'EST', 'ETH', 'FIN', 'FJI', 'FRA', 'GAB', 'GBR', 'GEO', 'GHA', 'GIN', 'GMB', 'GRC', 'GTM', 'GUY', 'HKG', 'HND', 'HRV', 'HTI', 'HUN', 'IDN', 'IND', 'IRL', 'IRN', 'IRQ', 'ISL', 'ISR', 'ITA', 'JAM', 'JOR', 'JPN', 'KAZ', 'KEN', 'KGZ', 'KHM', 'KOR', 'KWT', 'LAO', 'LBN', 'LBR', 'LBY', 'LKA', 'LSO', 'LTU', 'LUX', 'LVA', 'MAR', 'MCO', 'MDA', 'MDG', 'MEX', 'MLI', 'MMR', 'MNG', 'MOZ', 'MRT', 'MUS', 'MWI', 'MYS', 'NAM', 'NER', 'NGA', 'NIC', 'NLD', 'NOR', 'NPL', 'NZL', 'OMN', 'PAK', 'PAN', 'PER', 'PHL', 'PNG', 'POL', 'PRT', 'PRY', 'PSE', 'QAT', 'ROU', 'RUS', 'RWA', 'SAU', 'SDN', 'SEN', 'SGP', 'SLB', 'SLE', 'SLV', 'SMR', 'SOM', 'S