In [1]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive/')

# Change directory
%cd /content/drive/My Drive/CISC_CMPE351

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).
/content/drive/My Drive/CISC_CMPE351


In [2]:
# Upgrade sci-kit learn
!pip install --upgrade scikit-learn

Requirement already up-to-date: scikit-learn in /usr/local/lib/python3.7/dist-packages (0.24.1)


In [3]:
# Imports
import pandas as pd
import numpy as np

# Sci-kit Learn
from sklearn.multioutput import MultiOutputClassifier
from sklearn.neural_network import MLPClassifier

In [4]:
# Load file
data = pd.read_csv("1dayWindow_final.csv")

# Check if there is a bias to one class for buy or not
print(len(data[data["Buy_5d"] == 1])/len(data.index))
print(len(data[data["Buy_10d"] == 1])/len(data.index))
print(len(data[data["Buy_15d"] == 1])/len(data.index))
print(len(data[data["Buy_20d"] == 1])/len(data.index))

0.4820592451975965
0.5240964094662055
0.5561177278882927
0.5846243660668544


In [5]:
# Column Reference 
def columnReference(DataSet):
    index = 0
    for column in DataSet.columns:
        print(index, ":", column)
        index += 1
columnReference(data)

0 : date
1 : ticker
2 : volume_20_X
3 : adjclose_20_X
4 : close_20_X
5 : low_20_X
6 : high_20_X
7 : open_20_X
8 : volume_19_X
9 : adjclose_19_X
10 : close_19_X
11 : low_19_X
12 : high_19_X
13 : open_19_X
14 : volume_18_X
15 : adjclose_18_X
16 : close_18_X
17 : low_18_X
18 : high_18_X
19 : open_18_X
20 : volume_17_X
21 : adjclose_17_X
22 : close_17_X
23 : low_17_X
24 : high_17_X
25 : open_17_X
26 : volume_16_X
27 : adjclose_16_X
28 : close_16_X
29 : low_16_X
30 : high_16_X
31 : open_16_X
32 : volume_15_X
33 : adjclose_15_X
34 : close_15_X
35 : low_15_X
36 : high_15_X
37 : open_15_X
38 : volume_14_X
39 : adjclose_14_X
40 : close_14_X
41 : low_14_X
42 : high_14_X
43 : open_14_X
44 : volume_13_X
45 : adjclose_13_X
46 : close_13_X
47 : low_13_X
48 : high_13_X
49 : open_13_X
50 : volume_12_X
51 : adjclose_12_X
52 : close_12_X
53 : low_12_X
54 : high_12_X
55 : open_12_X
56 : volume_11_X
57 : adjclose_11_X
58 : close_11_X
59 : low_11_X
60 : high_11_X
61 : open_11_X
62 : volume_10_X
63 : adjclo

In [6]:
# Get 80% of the data by date
data['date'] = pd.to_datetime(data['date'])
data = data.sort_values('date', ascending = False)
data = data.reset_index(drop=True)
dates = pd.to_datetime(data['date'])
data = data.drop(columns = ['date', 'ticker'])
(dates <= dates.quantile(.8)).idxmax()

# Most recent data for testing
test =  data.head(49869)

# Rest of the data is training
train = data.tail(-49869)

# Split the data into train and test
X_train = train.iloc[:, list(range(120))] 
y_train = train.iloc[:, list(range(126, 134))] 
X_test = test.iloc[:, list(range(120))]
y_test = test.iloc[:, list(range(126, 134))]

# Get training column indices
columnReference(X_train)

# Get testing column indices
columnReference(y_train)

0 : volume_20_X
1 : adjclose_20_X
2 : close_20_X
3 : low_20_X
4 : high_20_X
5 : open_20_X
6 : volume_19_X
7 : adjclose_19_X
8 : close_19_X
9 : low_19_X
10 : high_19_X
11 : open_19_X
12 : volume_18_X
13 : adjclose_18_X
14 : close_18_X
15 : low_18_X
16 : high_18_X
17 : open_18_X
18 : volume_17_X
19 : adjclose_17_X
20 : close_17_X
21 : low_17_X
22 : high_17_X
23 : open_17_X
24 : volume_16_X
25 : adjclose_16_X
26 : close_16_X
27 : low_16_X
28 : high_16_X
29 : open_16_X
30 : volume_15_X
31 : adjclose_15_X
32 : close_15_X
33 : low_15_X
34 : high_15_X
35 : open_15_X
36 : volume_14_X
37 : adjclose_14_X
38 : close_14_X
39 : low_14_X
40 : high_14_X
41 : open_14_X
42 : volume_13_X
43 : adjclose_13_X
44 : close_13_X
45 : low_13_X
46 : high_13_X
47 : open_13_X
48 : volume_12_X
49 : adjclose_12_X
50 : close_12_X
51 : low_12_X
52 : high_12_X
53 : open_12_X
54 : volume_11_X
55 : adjclose_11_X
56 : close_11_X
57 : low_11_X
58 : high_11_X
59 : open_11_X
60 : volume_10_X
61 : adjclose_10_X
62 : close_10_

In [7]:
# Initial Model
myModel = MLPClassifier(random_state=1, max_iter=300, early_stopping = True)

# Wrap Model for Multiple Outputs
wrapper = MultiOutputClassifier(myModel).fit(X_train, y_train)

# Wrapper Prediction
ypred = wrapper.predict(X_test)

print(ypred)
print(y_test)

# Save Outputs
np.savetxt(r"1DayWindow_MLP_Classification_ypred.csv", ypred, delimiter=",")
np.savetxt(r"1DayWindow_MLP_Classification_ytest.csv", y_test, delimiter=",")

[[ 1  1 15 ...  1  1  0]
 [ 1  1 15 ...  1  1  1]
 [ 1  1 15 ...  1  1  1]
 ...
 [ 1  1 15 ...  0  1  1]
 [ 5  1  1 ...  1  1  1]
 [ 1  1 15 ...  1  1  0]]
       max1_5d  max1_10d  max1_15d  max1_20d  Buy_5d  Buy_10d  Buy_15d  Buy_20d
0            4         4         4         4       1        1        1        1
1            5         7         7        20       1        1        1        1
2            4         4         4        20       1        1        1        1
3            4         7         7         7       1        1        1        1
4            4         7        14        14       0        0        0        0
...        ...       ...       ...       ...     ...      ...      ...      ...
49864        1        10        14        20       1        1        1        1
49865        2        10        15        15       0        0        0        0
49866        3         9         9        20       1        1        1        1
49867        3         8        14        14