In [1]:
# default_exp TSXCore

# TSXCore

> nbdev module for preprocessing TSX data

In [2]:
#hide
from nbdev.showdoc import *
from nbdev import *

# Config Script

In [3]:
# export config
from pathlib import Path
import json

config_file_path = Path("config.json")
f = open(config_file_path,"r")
config = json.load(f)

In [4]:
tsx_data_path = config['tsx_data_path']

# TSX Preprocessing

In [5]:
# export TSX_preprocessing
import pandas as pd
import numpy as np
import warnings

warnings.filterwarnings("ignore")

def _get_tsx_features(data):
    tsx_features = np.array([(c if ('TSX' in c) else None) for c in data.columns.tolist()])
    tsx_features = tsx_features[tsx_features!=None].tolist()
    return tsx_features


def _get_tsx_year(t,df):
    cols = _tsx_cols_by_type(t,df)
    df[f"{t}_TSX_Investment_Previous_Year"]=df[cols].sum(axis = 1)
    df.drop(columns = cols,inplace = True)
    return df


def _tsx_cols_by_type(t,df,summary_stats = False):
    cols = np.array([(c if (t in c) else None)  for c in df.columns])
    cols = cols[cols!=None].tolist()
    if not summary_stats:            
        cols = list(set(cols)-set([f'TSX_{type}_Price_MedianofMedian_Investing',
                                   f'TSX_{type}_Price_Max_Investing',
                                   f'TSX_{type}_Price_Min_Investing']))
    return cols


def create_tsx_features(tsx_features_dir):
    df = pd.read_csv(tsx_features_dir)
    tsx_features = _get_tsx_features(df)
    tsx_types = list(set([t.split('_')[1] for t in tsx_features]))
    
    for t in tsx_types:
        df = _get_tsx_year(t,df)
        
    return df

Testing preprocessing

In [6]:
# hide
tsx = create_tsx_features(tsx_data_path)

In [7]:
# hide
tsx.head()

Unnamed: 0,Year,Financials_TSX_Investment_Previous_Year,Utilities_TSX_Investment_Previous_Year,Industrials_TSX_Investment_Previous_Year,Health_TSX_Investment_Previous_Year,Information_TSX_Investment_Previous_Year,Energy_TSX_Investment_Previous_Year,Composite_TSX_Investment_Previous_Year
0,2000,0.0,0.0,0.0,0.0,0.0,1335.015,145483.143
1,2001,1399.8725,1637.995,1141.4175,1012.5475,492.995,1633.5,116875.2053
2,2002,1616.7825,2026.6475,1195.015,1001.7875,343.22,1832.5325,105428.7653
3,2003,1762.5225,2087.7975,989.355,969.755,308.6825,2001.6475,107012.4503
4,2004,2142.88,2238.755,1095.585,951.2025,477.36,2623.36,129051.148


In [8]:
#hide
from nbdev.export import notebook2script
notebook2script()

Converted config.ipynb.
Converted index.ipynb.
Converted TSX_preprocessing.ipynb.
Converted TSXCore.ipynb.
