## Import Packages

In [1]:
import numpy as np
import pandas as pd
import glob
import os

## Import CSVs

In [2]:
# Current Path
ROOT_DIR = os.path.abspath(os.curdir)
path = ROOT_DIR + '/financial_data'
print(path)

csvfiles = glob.glob(path + "/*.csv")
# Creating a dataframe for each CSV file
dfs = [pd.read_csv(file) for file in csvfiles]


C:\FYP - Luke Bezzina\Code\preprocessingHistoricalData/financial_data


# New Dataframe

In [3]:
processed_df_coll = {}

## Pre-Process

In [4]:
prev_row = None
# Up to 0.2% deviation from close price from day before is considered Neutral movement
neutral_percentage = 0.002

for df in dfs:
    processed_df = pd.DataFrame(columns = ['Date', 'd10_pd', 'd9_pd', 'd8_pd', 'd7_pd', 'd6_pd',
                                           'd5_pd', 'd4_pd', 'd3_pd', 'd2_pd', 'd1_pd', 'Trend',
                                           'VolumeTrend', 'Volatility', 'PreviousClose', 'PriceDirection'])

    # Creating additional feature columns for each dataframe
    iterables = df.itertuples(index=True, name='Pandas')
    n = df.columns.get_loc('Name') # Name column in df
    df_name = df.iat[1, n] # Name of equity/etf

    # Iterating all rows in dataframe
    # Starting from size n + 1 - prices from past n days used per record
    for i in range(11, len(df) ):
        close = df.columns.get_loc('Close')
        volume = df.columns.get_loc('Volume')
        df_date = df.columns.get_loc('Date')

        # Price Trend Handling
        close_price = df.iat[i, close]
        prev_close_price = df.iat[i - 1, close]
        price_change = close_price - prev_close_price

        if price_change > (close_price * neutral_percentage):
            direction = 'Positive'
        elif price_change < 0 and abs(price_change) > (close_price * neutral_percentage):
            direction = 'Negative'
        else:
            direction = 'Neutral'

        # Price Difference Handling
        initial_close = df.iat[i - 11, close]

        d10_pd = df.iat[i - 10, close] - initial_close
        d9_pd = df.iat[i - 9, close] - initial_close
        d8_pd = df.iat[i - 8, close] - initial_close
        d7_pd = df.iat[i - 7, close] - initial_close
        d6_pd = df.iat[i - 6, close] - initial_close
        d5_pd = df.iat[i - 5, close] - initial_close
        d4_pd = df.iat[i - 4, close] - initial_close
        d3_pd = df.iat[i - 3, close] - initial_close
        d2_pd = df.iat[i - 2, close] - initial_close
        d1_pd = df.iat[i - 1, close] - initial_close

        # Volume Difference Handling
        initial_vol = df.iat[i - 11, volume] + 1 # Since volume traded can be 0, handling DIV by 0 issues

        vd10_pd = df.iat[i - 10, volume] / initial_vol
        vd9_pd = df.iat[i - 9, volume] / initial_vol
        vd8_pd = df.iat[i - 8, volume] / initial_vol
        vd7_pd = df.iat[i - 7, volume] / initial_vol
        vd6_pd = df.iat[i - 6, volume] / initial_vol
        vd5_pd = df.iat[i - 5, volume] / initial_vol
        vd4_pd = df.iat[i - 4, volume] / initial_vol
        vd3_pd = df.iat[i - 3, volume] / initial_vol
        vd2_pd = df.iat[i - 2, volume] / initial_vol
        vd1_pd = df.iat[i - 1, volume] / initial_vol

        date_ts = df.iat[i,df_date]

        # 10-day windows for close price and volume
        pds = [d1_pd, d2_pd, d3_pd, d4_pd, d5_pd, d6_pd, d7_pd, d8_pd, d9_pd, d10_pd]
        vpds = [vd1_pd, vd2_pd, vd3_pd, vd4_pd, vd5_pd, vd6_pd, vd7_pd, vd8_pd, vd9_pd, vd10_pd]

        # volatility of prices window
        volatility = np.std(pds)

        # finding trends of close and volume in terms of gradient (slope)
        slope, intercept = np.polyfit([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], pds, 1)
        slopeVol, interceptVol = np.polyfit([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], vpds, 1)

        new_row = {'Date':date_ts, 'd10_pd':d10_pd, 'd9_pd':d9_pd, 'd8_pd':d8_pd, 'd7_pd':d7_pd,
                   'd6_pd':d6_pd, 'd5_pd':d5_pd, 'd4_pd':d4_pd, 'd3_pd':d3_pd, 'd2_pd':d2_pd,
                   'd1_pd':d1_pd, 'Trend':slope, 'VolumeTrend':slopeVol, 'Volatility':volatility,
                   'PreviousClose':prev_close_price, 'PriceDirection':direction}

        #append row to the dataframe
        processed_df = processed_df.append(new_row, ignore_index=True)

    print(processed_df)
    processed_df_coll[df_name] = processed_df


NameError: name 'fyfy' is not defined

# Data Export

In [None]:
for key, df in processed_df_coll.items():
    df.to_csv(ROOT_DIR+'\\classification_10d\\'+key+".csv", index=False)

print("Export Complete!")