# Static Paramters

In [64]:
# Directories
data_src = 'data-raw/'
data_dst = 'data-processed/'

list_to_track = ['GOLD','GBP','JPY','EUR','DAX']
time = 2

# Files
file_names = dict()
for s in list_to_track:
    file_names[s] = s+'.csv'

# Modules

In [65]:
import os # path.join
import pandas as pd # read_csv
import numpy as np # np.nan

# Load Data

In [66]:
# File Data Dictionary
file_data = dict()

# Read and Store Data
for file_key in file_names:
    # Get File Path
    file_name = file_names[file_key]
    file_path = os.path.join( data_src, file_name )
    # Read
    data = pd.read_csv( file_path, parse_dates=[0] )
    # Rename Columns
    data.columns = [ 'Date', 'Value' ]
    # Do Conversion if required
    if( file_key in [ 'GOLD', 'EUR', 'DAX' ] ):
        data['Value'] = 1 / data['Value']
    # Sort by Date
    data.sort_values( ['Date'], axis = 0, inplace=True )
    # Reset Index
    data.reset_index( drop=True, inplace=True )
    # Add Boolean Column
    # Continuous or not?!
    #data['is_continuous'] = (data.Date - data.Date.shift(1)).map( lambda x: x.days <= 1 )
    # Calculate Growth
    data[file_key] = (data['Value'] / data['Value'].shift(1)).map( lambda x: 'increase' if x > 1 else 'decrease' )
    # Filter
    #data = data[ data.is_continuous ]
    # Drop / Select
    data.drop( labels=['Value'], axis=1, inplace=True )
    
    #setting time frames
    for t in range(time):
        file_data[ file_key+'_'+str(t) ] = data.copy()
        data[file_key] = data[file_key].shift(1)
        
file_data

{'GOLD_0':             Date      GOLD
 0     1970-01-01  decrease
 1     1970-04-01  increase
 2     1970-07-01  decrease
 3     1970-10-01  decrease
 4     1971-01-01  decrease
 5     1971-04-01  decrease
 6     1971-07-01  decrease
 7     1971-10-01  decrease
 8     1972-01-03  decrease
 9     1972-04-03  decrease
 10    1972-07-03  decrease
 11    1972-10-02  decrease
 12    1973-01-01  increase
 13    1973-04-02  decrease
 14    1973-07-02  decrease
 15    1973-10-01  increase
 16    1974-01-01  decrease
 17    1974-04-01  decrease
 18    1974-07-01  increase
 19    1974-10-01  increase
 20    1975-01-01  decrease
 21    1975-04-01  increase
 22    1975-07-01  increase
 23    1975-10-01  increase
 24    1976-01-01  increase
 25    1976-04-01  increase
 26    1976-07-01  increase
 27    1976-10-01  increase
 28    1977-01-03  decrease
 29    1977-04-01  decrease
 ...          ...       ...
 10322 2018-06-04  decrease
 10323 2018-06-05  increase
 10324 2018-06-06  decrease
 10325 201

# Merge Data

In [67]:
df_data = {
    'inner' : pd.DataFrame(),
    'outer' : pd.DataFrame()
}
for merge_type in df_data:
    # Merge
    for key in file_data:
        print(key)
        if( df_data[merge_type].shape[0] == 0 ):
            df_data[merge_type] = file_data[key].copy()
        else:
            df_data[merge_type] = pd.merge( df_data[merge_type], file_data[key], on = 'Date', how = merge_type )
        
    # Drop Date
    print(df_data[merge_type])
    df_data[merge_type].drop( labels = ['Date'], axis = 1, inplace = True )
    df_data[merge_type].replace(np.nan,'N/A',inplace=True)

GOLD_0
GOLD_1
GBP_0
GBP_1
JPY_0
JPY_1
EUR_0
EUR_1
DAX_0
DAX_1
           Date    GOLD_x    GOLD_y     GBP_x     GBP_y     JPY_x     JPY_y  \
0    2000-01-03  decrease  decrease  decrease  decrease  decrease  decrease   
1    2000-01-04  increase  decrease  decrease  decrease  increase  decrease   
2    2000-01-05  increase  increase  decrease  decrease  increase  increase   
3    2000-01-06  increase  increase  decrease  decrease  increase  increase   
4    2000-01-07  decrease  increase  increase  decrease  increase  increase   
5    2000-01-10  increase  decrease  increase  decrease  decrease  decrease   
6    2000-01-11  decrease  increase  decrease  increase  increase  decrease   
7    2000-01-12  decrease  decrease  decrease  decrease  decrease  increase   
8    2000-01-13  increase  decrease  increase  decrease  increase  decrease   
9    2000-01-14  decrease  increase  increase  increase  decrease  increase   
10   2000-01-17  decrease  decrease  increase  decrease  decrease  de

In [68]:
tmp = df_data['inner']
tmp

Unnamed: 0,GOLD_x,GOLD_y,GBP_x,GBP_y,JPY_x,JPY_y,EUR_x,EUR_y,DAX_x,DAX_y
0,decrease,decrease,decrease,decrease,decrease,decrease,decrease,increase,decrease,decrease
1,increase,decrease,decrease,decrease,increase,decrease,decrease,decrease,increase,decrease
2,increase,increase,decrease,decrease,increase,increase,decrease,decrease,increase,increase
3,increase,increase,decrease,decrease,increase,increase,decrease,decrease,increase,increase
4,decrease,increase,increase,decrease,increase,increase,increase,decrease,decrease,increase
5,increase,decrease,increase,decrease,decrease,decrease,increase,increase,decrease,decrease
6,decrease,increase,decrease,increase,increase,decrease,decrease,increase,increase,decrease
7,decrease,decrease,decrease,decrease,decrease,increase,decrease,decrease,decrease,increase
8,increase,decrease,increase,decrease,increase,decrease,increase,decrease,decrease,decrease
9,decrease,increase,increase,increase,decrease,increase,increase,increase,decrease,decrease


# Store

In [63]:
for key in df_data:
    # Path
    file_name = '{}.dat'.format( key )
    file_path = os.path.join( data_dst, file_name )
    # Store
    df_data[key].to_csv( file_path, index = False )
df_data['inner']

Unnamed: 0,GOLD,GBP,JPY,EUR,DAX
0,decrease,decrease,decrease,decrease,decrease
1,increase,decrease,increase,decrease,increase
2,increase,decrease,increase,decrease,increase
3,increase,decrease,increase,decrease,increase
4,decrease,increase,increase,increase,decrease
5,increase,increase,decrease,increase,decrease
6,decrease,decrease,increase,decrease,increase
7,decrease,decrease,decrease,decrease,decrease
8,increase,increase,increase,increase,decrease
9,decrease,increase,decrease,increase,decrease
