# Import - Process - Export to Azure

In [52]:
import pandas as pd
from azureml import Workspace

ws = Workspace()
files = ['Commodity', 'Currency', 'Economic_Indicator',
         'Equity', 'Index', 'Precious_Metal']

names = []
masters = {}
instruments = {}
for file in files:
    # Group name
    name = file[0:3].lower()
    names.append(name)
    # Master
    masters[name] = ws.datasets[file + '_Master.csv'].to_dataframe()
    # Data
    instruments[name] = ws.datasets[file + '.csv'].to_dataframe()
print(names)

['com', 'cur', 'eco', 'equ', 'ind', 'pre']


Filter by country

In [119]:
import numpy as np

def split_by_ticker(instruments, tickers):
    if tickers.empty:
        return pd.DataFrame()
    # Split by ticker
    dict_instruments = {}
    for ticker in tickers.Ticker:
        instrument = instruments.loc[instruments.Ticker == ticker]
        instrument = instrument.set_index('Date')
        instrument['LogReturn'] = np.log(instrument.Close) - np.log(instrument.Close.shift(1))
        dict_instruments[ticker] = instrument[['LogReturn']]
    # Concat into a DataFrame
    instrument_keys = list(dict_instruments.keys())
    instruments_out = pd.concat([dict_instruments[key].rename(columns={'LogReturn': key}) for key in instrument_keys], axis=1)
    
    # Return value must be of a sequence of pandas.DataFrame
    return instruments_out

In [92]:
# Use augmented version of Index_Master
masters['ind'] = ws.datasets['Index_Master2.csv'].to_dataframe()
# Find the country for each equity
masters['equ']['Country'] = masters['equ'].Ticker.map(lambda x: x.split()[1])
# Correct currency (change to upper case)
masters['equ'].Currency = masters['equ'].Currency.map(lambda x: x.upper())
# Countries
countries = set(masters['ind'].Country)
index = {}
equity = {}
currency = {}
for country in countries:
    index[country] = masters['ind'][masters['ind'].Country == country]
    equity[country] = masters['equ'][masters['equ'].Country == country]
    
    curr4country = index[country].Currency.values[0]
    currency[country] = masters['cur'][masters['cur'].Currency == curr4country]

In [120]:
# Split dataframe by Ticker
split_index = {}
split_currency = {}
for country in countries:
    split_index[country] = split_by_ticker(instruments['ind'], index[country])
    split_currency[country] = split_by_ticker(instruments['cur'], currency[country])

In [109]:
split_equity = {}
for country in countries:
    print(country)
    split_equity[country] = split_by_ticker(instruments['equ'], equity[country])

AU
VX
SS
SM
US
CN
CH
IN
BZ
LN
FP
JP
TT
MM
GR
IM
HK
KS
CI


Save as new dataset

In [113]:
from azureml import DataTypeIds

for country in countries:
    df = split_index[country]
    dataset = ws.datasets.add_from_dataframe(
        dataframe=df,
        data_type_id=DataTypeIds.GenericCSV,
        name=country+'_Index',
        description='Index by country'
        )

In [121]:
for country in countries:
    df = split_currency[country]
    if not df.empty:
        dataset = ws.datasets.add_from_dataframe(
            dataframe=df,
            data_type_id=DataTypeIds.GenericCSV,
            name=country+'_Currency',
            description='Currency by country'
            )

In [122]:
for country in countries:
    df = split_equity[country]
    if not df.empty:
        dataset = ws.datasets.add_from_dataframe(
            dataframe=df,
            data_type_id=DataTypeIds.GenericCSV,
            name=country+'_Equity',
            description='Equity by country'
            )