# Import Packages

In [1]:
import numpy as np
import pandas as pd
import glob
import os

# Import CSVs

In [2]:
# Current Path
ROOT_DIR = os.path.abspath(os.curdir)
path = ROOT_DIR + '/financial_data'
print(path)

csvfiles = glob.glob(path + "/*.csv")

# Creating a dataframe for each CSV file
dfs = [pd.read_csv(file) for file in csvfiles]

C:\Users\Dell\Documents\B.Sc IT (Hons.) Computing and Business\FYP - Luke Bezzina\Code\preprocessingHistoricalData/financial_data


# New Dataframe

In [3]:
processed_df_coll = {}

# Pre-Process

In [4]:
prev_row = None
# Up to 0.2% deviation from close price from day before is considered Neutral movement
neutral_percentage = 0.002

for df in dfs:
    processed_df = pd.DataFrame(columns = ['Date', 'OpenPrice', 'ClosePrice', 'PriceDirection'])

    # Creating additional feature columns for each dataframe
    iterables = df.itertuples(index=True, name='Pandas')
    n = df.columns.get_loc('Name') # Name column in df
    df_name = df.iat[1, n] # Name of equity/etf

    print(df_name)

    # Iterating all rows in dataframe
    # Ending at end day - 2 to allow for day + 2 classification label
    for i in range(1, len(df) - 2 ):
        open = df.columns.get_loc('Open')
        close = df.columns.get_loc('Close')
        volume = df.columns.get_loc('Volume')
        df_date = df.columns.get_loc('Date')

        # Prices per row
        open_price = df.iat[i-1, open]
        close_price = df.iat[i-1, close]

        # Price Trend Handling
        price_today = df.iat[i + 0, close]
        price_tomorrow = df.iat[i + 1, close]
        price_2days_after = df.iat[i + 2, close]

        close_price_3d = (price_today + price_tomorrow + price_2days_after)/3
        prev_close_price = df.iat[i - 1, close]
        price_change = close_price_3d - prev_close_price

        if price_change > (close_price_3d * neutral_percentage):
            direction = 'Positive'
        elif price_change < 0 and abs(price_change) > (close_price_3d * neutral_percentage):
            direction = 'Negative'
        else:
            direction = 'Neutral'

        date_ts = df.iat[i,df_date]

        new_row = {'Date':date_ts, 'OpenPrice':open_price, 'ClosePrice':close_price,
                   'PriceDirection':direction}

        #append row to the dataframe
        processed_df = processed_df.append(new_row, ignore_index=True)

    print(processed_df)
    processed_df_coll[df_name] = processed_df


AAL


            Date  OpenPrice  ClosePrice PriceDirection
0     2017-01-04      47.28       46.30        Neutral
1     2017-01-05      46.63       46.70       Negative
2     2017-01-06      46.52       45.89       Positive
3     2017-01-09      45.85       46.21       Positive
4     2017-01-10      46.01       47.08       Positive
...          ...        ...         ...            ...
999   2020-12-22      15.70       16.10       Negative
1000  2020-12-23      16.35       15.48       Positive
1001  2020-12-24      15.52       15.89        Neutral
1002  2020-12-28      16.04       15.66       Positive
1003  2020-12-29      15.96       16.06       Negative

[1004 rows x 4 columns]
AAPL


            Date  OpenPrice  ClosePrice PriceDirection
0     2017-01-04      28.95       29.04       Positive
1     2017-01-05      28.96       29.00       Positive
2     2017-01-06      28.98       29.15       Positive
3     2017-01-09      29.20       29.48       Positive
4     2017-01-10      29.49       29.75       Positive
...          ...        ...         ...            ...
999   2020-12-22     125.02      128.23       Positive
1000  2020-12-23     131.61      131.88       Positive
1001  2020-12-24     132.16      130.96       Positive
1002  2020-12-28     131.32      131.97       Positive
1003  2020-12-29     133.99      136.69       Negative

[1004 rows x 4 columns]
AMT


            Date  OpenPrice  ClosePrice PriceDirection
0     04/01/2017     106.73      106.15       Negative
1     05/01/2017     106.41      106.34       Negative
2     06/01/2017     106.00      105.97       Negative
3     09/01/2017     105.93      105.27       Negative
4     10/01/2017     105.50      105.02       Negative
...          ...        ...         ...            ...
999   22/12/2020     219.13      219.78       Negative
1000  23/12/2020     219.07      220.49       Negative
1001  24/12/2020     221.50      217.40       Positive
1002  28/12/2020     216.44      218.65       Positive
1003  29/12/2020     218.87      221.07       Positive

[1004 rows x 4 columns]
AVY


            Date  OpenPrice  ClosePrice PriceDirection
0     2017-01-04      70.76       70.14       Positive
1     2017-01-05      70.45       72.14        Neutral
2     2017-01-06      72.00       71.43       Positive
3     2017-01-09      72.28       72.40       Positive
4     2017-01-10      72.48       72.56        Neutral
...          ...        ...         ...            ...
999   2020-12-22     150.69      152.08       Positive
1000  2020-12-23     151.37      151.76       Positive
1001  2020-12-24     152.67      152.71       Positive
1002  2020-12-28     153.57      153.40        Neutral
1003  2020-12-29     155.00      153.94        Neutral

[1004 rows x 4 columns]
AWK


            Date  OpenPrice  ClosePrice PriceDirection
0     04/01/2017      72.37       72.25       Positive
1     05/01/2017      72.49       72.81       Negative
2     06/01/2017      72.69       72.93       Negative
3     09/01/2017      72.65       72.84       Negative
4     10/01/2017      72.88       71.35       Negative
...          ...        ...         ...            ...
999   22/12/2020     148.95      149.11       Negative
1000  23/12/2020     149.34      148.06       Positive
1001  24/12/2020     148.95      147.41       Positive
1002  28/12/2020     147.88      149.14       Positive
1003  29/12/2020     149.59      149.65       Positive

[1004 rows x 4 columns]
BA


            Date  OpenPrice  ClosePrice PriceDirection
0     2017-01-04     156.30      156.97       Positive
1     2017-01-05     157.81      158.62        Neutral
2     2017-01-06     158.41      158.71        Neutral
3     2017-01-09     158.98      159.10        Neutral
4     2017-01-10     159.00      158.32       Positive
...          ...        ...         ...            ...
999   2020-12-22     212.00      219.31       Negative
1000  2020-12-23     223.16      218.78       Negative
1001  2020-12-24     219.75      219.69       Negative
1002  2020-12-28     219.62      217.15       Negative
1003  2020-12-29     217.75      216.09        Neutral

[1004 rows x 4 columns]
BAC


            Date  OpenPrice  ClosePrice PriceDirection
0     2017-01-04      22.60       22.53       Positive
1     2017-01-05      22.72       22.95       Negative
2     2017-01-06      22.82       22.68        Neutral
3     2017-01-09      22.78       22.68       Positive
4     2017-01-10      22.51       22.55       Positive
...          ...        ...         ...            ...
999   2020-12-22      28.97       29.74        Neutral
1000  2020-12-23      29.78       29.21       Positive
1001  2020-12-24      29.38       30.05        Neutral
1002  2020-12-28      30.27       29.96       Positive
1003  2020-12-29      30.17       30.13        Neutral

[1004 rows x 4 columns]
CCL


            Date  OpenPrice  ClosePrice PriceDirection
0     04/01/2017      52.23       52.14       Positive
1     05/01/2017      52.45       52.82       Positive
2     06/01/2017      53.05       53.71       Positive
3     09/01/2017      53.59       53.65       Positive
4     10/01/2017      53.37       53.69       Positive
...          ...        ...         ...            ...
999   22/12/2020      20.22       21.06       Negative
1000  23/12/2020      21.07       19.81       Positive
1001  24/12/2020      20.12       20.94       Positive
1002  28/12/2020      20.98       20.84       Positive
1003  29/12/2020      21.20       21.71       Negative

[1004 rows x 4 columns]
CNP


            Date  OpenPrice  ClosePrice PriceDirection
0     2017-01-04      24.72       24.59       Positive
1     2017-01-05      24.70       24.81       Positive
2     2017-01-06      24.79       24.81       Positive
3     2017-01-09      24.80       25.36       Negative
4     2017-01-10      25.41       24.88       Positive
...          ...        ...         ...            ...
999   2020-12-22      21.61       21.05       Positive
1000  2020-12-23      21.02       20.82       Positive
1001  2020-12-24      21.06       21.50       Negative
1002  2020-12-28      21.47       21.58       Negative
1003  2020-12-29      21.61       21.41       Positive

[1004 rows x 4 columns]
EQR


            Date  OpenPrice  ClosePrice PriceDirection
0     2017-01-04      64.87       63.63       Positive
1     2017-01-05      63.84       64.28       Positive
2     2017-01-06      63.97       65.15       Negative
3     2017-01-09      64.97       65.38       Negative
4     2017-01-10      65.26       64.47       Negative
...          ...        ...         ...            ...
999   2020-12-22      56.70       57.86       Negative
1000  2020-12-23      57.71       57.85       Positive
1001  2020-12-24      58.00       57.07       Positive
1002  2020-12-28      57.48       58.20       Positive
1003  2020-12-29      58.32       59.42       Negative

[1004 rows x 4 columns]
F


            Date  OpenPrice  ClosePrice PriceDirection
0     2017-01-04      12.20       12.59       Positive
1     2017-01-05      12.77       13.17       Negative
2     2017-01-06      13.21       12.77        Neutral
3     2017-01-09      12.80       12.76       Negative
4     2017-01-10      12.79       12.63       Positive
...          ...        ...         ...            ...
999   2020-12-22       8.87        8.93       Negative
1000  2020-12-23       8.98        8.79       Positive
1001  2020-12-24       8.81        8.99       Negative
1002  2020-12-28       9.01        8.86        Neutral
1003  2020-12-29       8.92        8.89       Negative

[1004 rows x 4 columns]
FOX


           Date  OpenPrice  ClosePrice PriceDirection
0    2019-03-15      39.50       39.96       Positive
1    2019-03-18      40.55       41.02       Negative
2    2019-03-19      41.73       40.86       Negative
3    2019-03-20      41.40       39.55       Negative
4    2019-03-21      39.85       37.60        Neutral
..          ...        ...         ...            ...
448  2020-12-22      27.53       27.39       Positive
449  2020-12-23      27.35       26.98       Positive
450  2020-12-24      27.08       27.75       Positive
451  2020-12-28      27.75       28.00       Positive
452  2020-12-29      28.05       28.88       Negative

[453 rows x 4 columns]
HFC


            Date  OpenPrice  ClosePrice PriceDirection
0     04/01/2017      33.21       33.57       Negative
1     05/01/2017      34.13       34.30       Negative
2     06/01/2017      34.19       33.75       Negative
3     09/01/2017      33.78       32.01       Negative
4     10/01/2017      31.71       30.95       Negative
...          ...        ...         ...            ...
999   22/12/2020      24.18       24.81       Positive
1000  23/12/2020      24.65       24.32       Positive
1001  24/12/2020      24.66       25.45       Negative
1002  28/12/2020      25.40       25.35       Positive
1003  29/12/2020      25.55       25.22       Positive

[1004 rows x 4 columns]
JNJ


            Date  OpenPrice  ClosePrice PriceDirection
0     04/01/2017     115.78      115.84       Positive
1     05/01/2017     115.50      115.65       Positive
2     06/01/2017     116.00      116.86       Negative
3     09/01/2017     116.67      116.30       Negative
4     10/01/2017     116.35      116.28       Negative
...          ...        ...         ...            ...
999   22/12/2020     152.01      153.02       Negative
1000  23/12/2020     152.30      152.72        Neutral
1001  24/12/2020     153.01      151.94       Positive
1002  28/12/2020     151.85      152.47       Positive
1003  29/12/2020     153.39      153.19       Positive

[1004 rows x 4 columns]
MSFT


            Date  OpenPrice  ClosePrice PriceDirection
0     2017-01-04      62.79       62.58        Neutral
1     2017-01-05      62.48       62.30       Positive
2     2017-01-06      62.19       62.30       Positive
3     2017-01-09      62.30       62.84        Neutral
4     2017-01-10      62.76       62.64       Positive
...          ...        ...         ...            ...
999   2020-12-22     217.60      222.59        Neutral
1000  2020-12-23     222.68      223.94       Negative
1001  2020-12-24     223.25      221.02       Positive
1002  2020-12-28     221.42      222.75       Positive
1003  2020-12-29     224.50      224.96       Negative

[1004 rows x 4 columns]
NFLX


            Date  OpenPrice  ClosePrice PriceDirection
0     2017-01-04     124.96      127.49       Positive
1     2017-01-05     127.49      129.41       Positive
2     2017-01-06     129.22      131.81       Negative
3     2017-01-09     132.08      131.07       Negative
4     2017-01-10     131.48      130.95       Negative
...          ...        ...         ...            ...
999   2020-12-22     530.37      528.91       Negative
1000  2020-12-23     528.00      527.33       Negative
1001  2020-12-24     524.76      514.48       Positive
1002  2020-12-28     515.12      513.97       Positive
1003  2020-12-29     516.26      519.12       Positive

[1004 rows x 4 columns]
PFE


            Date  OpenPrice  ClosePrice PriceDirection
0     2017-01-04      32.70       33.00       Positive
1     2017-01-05      33.13       33.29       Positive
2     2017-01-06      33.37       33.61       Negative
3     2017-01-09      33.66       33.48       Negative
4     2017-01-10      33.43       33.47       Negative
...          ...        ...         ...            ...
999   2020-12-22      37.24       37.38       Negative
1000  2020-12-23      37.21       36.74       Positive
1001  2020-12-24      37.01       37.44       Negative
1002  2020-12-28      37.40       37.27       Negative
1003  2020-12-29      37.39       36.82        Neutral

[1004 rows x 4 columns]
PKG


            Date  OpenPrice  ClosePrice PriceDirection
0     04/01/2017      85.16       85.00       Positive
1     05/01/2017      85.44       86.37       Positive
2     06/01/2017      86.37       85.33       Positive
3     09/01/2017      85.37       87.20       Positive
4     10/01/2017      86.90       87.77        Neutral
...          ...        ...         ...            ...
999   22/12/2020     133.32      133.77       Positive
1000  23/12/2020     133.62      133.81       Positive
1001  24/12/2020     134.12      135.73       Positive
1002  28/12/2020     136.27      136.10       Positive
1003  29/12/2020     137.94      136.78        Neutral

[1004 rows x 4 columns]
PM


            Date  OpenPrice  ClosePrice PriceDirection
0     2017-01-04      91.79       91.23        Neutral
1     2017-01-05      90.53       90.42       Positive
2     2017-01-06      90.69       91.13        Neutral
3     2017-01-09      91.39       91.84       Negative
4     2017-01-10      91.09       91.31       Negative
...          ...        ...         ...            ...
999   2020-12-22      84.53       84.17       Negative
1000  2020-12-23      82.53       82.19       Positive
1001  2020-12-24      82.45       82.20        Neutral
1002  2020-12-28      82.16       82.51       Negative
1003  2020-12-29      82.81       82.49       Negative

[1004 rows x 4 columns]
S&P500


            Date  OpenPrice  ClosePrice PriceDirection
0     2017-01-04    2251.57     2257.83       Positive
1     2017-01-05    2261.60     2270.75        Neutral
2     2017-01-06    2268.18     2269.00        Neutral
3     2017-01-09    2271.14     2276.98       Negative
4     2017-01-10    2273.59     2268.90        Neutral
...          ...        ...         ...            ...
999   2020-12-22    3684.28     3694.92        Neutral
1000  2020-12-23    3698.08     3687.26       Positive
1001  2020-12-24    3693.42     3690.01       Positive
1002  2020-12-28    3694.03     3703.06       Positive
1003  2020-12-29    3723.03     3735.36        Neutral

[1004 rows x 4 columns]
WFC


            Date  OpenPrice  ClosePrice PriceDirection
0     2017-01-04      55.67       56.00       Negative
1     2017-01-05      56.16       56.05       Negative
2     2017-01-06      55.49       55.18       Negative
3     2017-01-09      55.26       55.04       Negative
4     2017-01-10      54.68       54.24       Positive
...          ...        ...         ...            ...
999   2020-12-22      29.50       29.55       Positive
1000  2020-12-23      29.65       28.96       Positive
1001  2020-12-24      29.29       30.35       Negative
1002  2020-12-28      30.29       29.84        Neutral
1003  2020-12-29      29.94       29.93        Neutral

[1004 rows x 4 columns]
WMT


            Date  OpenPrice  ClosePrice PriceDirection
0     2017-01-04      69.24       68.66       Positive
1     2017-01-05      68.66       69.06       Negative
2     2017-01-06      68.43       69.21       Negative
3     2017-01-09      68.41       68.26       Positive
4     2017-01-10      68.33       68.71       Negative
...          ...        ...         ...            ...
999   2020-12-22     145.62      145.97       Negative
1000  2020-12-23     145.18      144.20        Neutral
1001  2020-12-24     143.72      143.22       Positive
1002  2020-12-28     143.54      143.50       Positive
1003  2020-12-29     143.80      145.22       Negative

[1004 rows x 4 columns]
XOM


            Date  OpenPrice  ClosePrice PriceDirection
0     2017-01-04      90.94       90.89       Negative
1     2017-01-05      91.12       89.89       Negative
2     2017-01-06      90.19       88.55       Negative
3     2017-01-09      88.79       88.50       Negative
4     2017-01-10      88.22       87.04       Negative
...          ...        ...         ...            ...
999   2020-12-22      40.99       41.95       Negative
1000  2020-12-23      41.63       41.24       Positive
1001  2020-12-24      41.55       41.77       Negative
1002  2020-12-28      41.65       41.60        Neutral
1003  2020-12-29      41.69       41.74       Negative

[1004 rows x 4 columns]


# Data Export

In [5]:
for key, df in processed_df_coll.items():
    df.to_csv(ROOT_DIR+'\\LSTM\\'+key+".csv", index=False)

print("Export Complete!")

Export Complete!
