# Import libraries

In [16]:
import pandas as pd
import numpy as np
import xlwings as xw
import matplotlib.pyplot as plt
import os

# Define global variables

In [2]:
xlDataName = '\DissData [02]'
dataFolder = r"C:\Users\rfg\OneDrive\Desktop\Dissertation ES30029\data"
companyNamesRange = "A1:A2009"
cellRng = "A1:BYH288"
xlBook = xw.books(xlDataName[1:])
sheetWhitelist = ['all_companies']
fields = []

# Read data from Excel

In [3]:
fields = []
for sheet in xlBook.sheets:
    if sheet.name not in sheetWhitelist:
        exec('{} = pd.DataFrame(xw.books(xlDataName[1:]).sheets("{}").range(cellRng).value)'.format(sheet.name, sheet.name))
        exec('fields.append({})'.format(sheet.name))
        exec('{}.name = "{}"'.format(sheet.name, sheet.name))
        


# Clean up the dataframes to have correct cols and rows
for field in fields:
    companyNames = [str(i) for i in xlBook.sheets("all_companies").range(companyNamesRange).value]
    companyNames.insert(0, field.iloc[0][0])
    field.columns = companyNames
    field.index = field[field.columns[0]]
    del field[field.columns[0]]
    field.drop(field.index[0], inplace=True)
    field.replace("NA", np.nan, inplace=True)
    field = field.apply(pd.to_numeric,errors='coerce')
    


# Data Cleaning

### Remove series which don't have data for a field

In [4]:
errorCols = []
for field in fields:
    for col in field:
        try:
            if field[col].iloc[0][:4] == '$$ER': 
                errorCols.append(col)
        except:
            pass

errorCols = list(dict.fromkeys(errorCols))

for field in fields:
    for col in errorCols:
        del field[col]

print(np.stack(fields, axis=-1).shape)

(287, 1562, 12)


# Data Calculation

### Returns

In [5]:
r = pd.DataFrame()

r = p/p.shift(1)-1

r.name = 'r'

In [6]:
ret_3m = pd.DataFrame()
ret_6m = pd.DataFrame()
ret_9m = pd.DataFrame()
ret_12m = pd.DataFrame()
ret_18m = pd.DataFrame()
ret_24m = pd.DataFrame()
ret_36m = pd.DataFrame()

ret_3m = (1+r).rolling(window=3).apply(np.prod, raw=True)-1
ret_6m = (1+r).rolling(window=6).apply(np.prod, raw=True)-1
ret_9m = (1+r).rolling(window=9).apply(np.prod, raw=True)-1
ret_12m = (1+r).rolling(window=12).apply(np.prod, raw=True)-1
ret_18m = (1+r).rolling(window=18).apply(np.prod, raw=True)-1
ret_24m = (1+r).rolling(window=24).apply(np.prod, raw=True)-1
ret_36m = (1+r).rolling(window=36).apply(np.prod, raw=True)-1

ret_3m.name = 'ret_3m'
ret_6m.name = 'ret_6m'
ret_9m.name = 'ret_9m'
ret_12m.name = 'ret_12m'
ret_18m.name = 'ret_18m'
ret_24m.name = 'ret_24m'
ret_36m.name = 'ret_36m'

rollingReturns = [ret_3m, ret_6m, ret_9m, ret_12m, ret_18m, ret_24m, ret_36m]
for i in rollingReturns:
    print(i.values.shape)

for i in rollingReturns:
    fields.append(i)

(287, 1562)
(287, 1562)
(287, 1562)
(287, 1562)
(287, 1562)
(287, 1562)
(287, 1562)


### Moving averages

In [7]:
map_3m = pd.DataFrame()
map_6m = pd.DataFrame()
map_9m = pd.DataFrame()
map_12m = pd.DataFrame()
map_18m = pd.DataFrame()
map_24m = pd.DataFrame()
map_36m = pd.DataFrame()

# Calculate moving averages
map_3m = p.rolling(window=3).mean()
map_6m = p.rolling(window=6).mean()
map_9m = p.rolling(window=9).mean()
map_12m = p.rolling(window=12).mean()
map_18m = p.rolling(window=18).mean()
map_24m = p.rolling(window=24).mean()
map_36m = p.rolling(window=36).mean()

map_3m.name = 'map_3m'
map_6m.name = 'map_6m'
map_9m.name = 'map_9m'
map_12m.name = 'map_12m'
map_18m.name = 'map_18m'
map_24m.name = 'map_24m'
map_36m.name = 'map_36m'

# Technical indicators (crosses of MAVs for example)

movingAverages = [map_3m, map_6m, map_9m, map_12m, map_18m, map_24m, map_36m]
for i in movingAverages:
    print(i.values.shape)

for i in movingAverages:
    fields.append(i)

(287, 1562)
(287, 1562)
(287, 1562)
(287, 1562)
(287, 1562)
(287, 1562)
(287, 1562)


### Volatilities

In [8]:
std_3m = pd.DataFrame()
std_6m = pd.DataFrame()
std_9m = pd.DataFrame()
std_12m = pd.DataFrame()
std_18m = pd.DataFrame()
std_24m = pd.DataFrame()
std_36m = pd.DataFrame()

std_3m = r.rolling(window=3).std() * (12 ** 0.5)
std_6m = r.rolling(window=6).std() * (12 ** 0.5)
std_9m = r.rolling(window=9).std() * (12 ** 0.5)
std_12m = r.rolling(window=12).std() * (12 ** 0.5)
std_18m = r.rolling(window=18).std() * (12 ** 0.5)
std_24m = r.rolling(window=24).std() * (12 ** 0.5)
std_36m = r.rolling(window=36).std() * (12 ** 0.5)

std_3m.name = 'std_3m'
std_6m.name = 'std_6m'
std_9m.name = 'std_9m'
std_12m.name = 'std_12m'
std_18m.name = 'std_18m'
std_24m.name = 'std_24m'
std_36m.name = 'std_36m'

volatilities = [std_3m, std_6m, std_9m, std_12m, std_18m, std_24m, std_36m]
for i in volatilities:
    print(i.values.shape)

for i in volatilities:
    fields.append(i)

(287, 1562)
(287, 1562)
(287, 1562)
(287, 1562)
(287, 1562)
(287, 1562)
(287, 1562)


# Data Availability

# Export Data

### Numpy Data Cube

In [9]:
# Also save index and columns so that you can load up data with these
# np.save(file=dataFolder+r'\npData.npy', arr=np.stack(fields, axis=-1))
# np.save(file=dataFolder+'\index.npy', arr=p.index)
# np.save(file=dataFolder+'\columns.npy', arr=p.columns)

### Individual Pandas Dataframes

In [19]:
# Save rows and columns
np.save(file=dataFolder+r'\index.npy', arr=p.index)
np.save(file=dataFolder+r'\columns.npy', arr=p.columns)
np.save(file=dataFolder+r'\fields.npy', arr=[field.name for field in fields])


for field in fields:
    np.save(file=os.path.join(dataFolder, '{}.npy'.format(field.name)), arr=field)