In [1]:
# Mounting Google Drive
from google.colab import drive
from os.path import join
ROOT = '/content/drive'
drive.mount(ROOT)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# Copy data to local runtime
PROJ = 'Shareddrives/Deep Learning'
PROJ_PATH = join(ROOT, PROJ)
!rsync -aP "{PROJ_PATH}"/* ./

sending incremental file list
rsync: send_files failed to open "/content/drive/Shareddrives/Deep Learning/A1.gdoc": Operation not supported (95)
rsync error: some files/attrs were not transferred (see previous errors) (code 23) at main.c(1196) [sender=3.1.2]


In [3]:
# Import necessary libraries
from Students import simulator, network, predictor
import pandas as pd
import torch
import torch.nn as nn

In [4]:
labels_market_segments = ['company', 'segment']
market_segments = []
simulator.read_market_segments(market_segments)

labels_stock_prices = ['company', 'year', 'day', 'quarter', 'stock_price']
stock_prices = []
simulator.read_stock_prices(stock_prices)

labels_market_analysis = ['segment', 'year', 'quarter', 'trend']
market_analysis = []
simulator.read_market_analysis(market_analysis)

labels_info=['company', 'year', 'day','quarter','expert1_prediction','expert2_prediction','sentiment_analysis','m1','m2','m3','m4']
info=[]
simulator.read_info(info)

In [5]:
# Pandas, importing the data from text files
df_market_segments = pd.DataFrame(market_segments, columns=labels_market_segments)
df_stock_prices = pd.DataFrame(stock_prices, columns=labels_stock_prices)
df_market_analysis = pd.DataFrame(market_analysis, columns=labels_market_analysis)
df_info = pd.DataFrame(info, columns=labels_info)

In [6]:
df_stock_prices.head()

Unnamed: 0,company,year,day,quarter,stock_price
0,0,2017,3,0,102.2
1,0,2017,4,0,102.2
2,0,2017,5,0,102.2
3,0,2017,6,0,102.2
4,0,2017,9,0,102.2


In [7]:
df_market_segments.head()

Unnamed: 0,company,segment
0,0,IT
1,1,BIO
2,2,IT


In [8]:
df_market_analysis.head()

Unnamed: 0,segment,year,quarter,trend
0,IT,2016,3,0
1,IT,2017,0,-1
2,IT,2017,1,0
3,IT,2017,2,1
4,IT,2017,3,1


In [9]:
df_info.head()

Unnamed: 0,company,year,day,quarter,expert1_prediction,expert2_prediction,sentiment_analysis,m1,m2,m3,m4
0,0,2017,3,0,0,0,10,6.3,1824.0,-1.0,0
1,0,2017,4,0,0,1,10,5.1,6912.0,-0.9,0
2,0,2017,5,0,0,1,10,6.6,8928.0,0.3,0
3,0,2017,6,0,0,1,10,7.8,6924.0,0.0,0
4,0,2017,9,0,0,1,10,-0.9,5635.0,0.9,0


In [10]:
# function that merges two datasets
def merge_datasets(starting_df, df_to_merge, parameters):
  return starting_df.merge(df_to_merge, on=parameters)

In [11]:
'''
df = merge_datasets(df_stock_prices, df_market_segments, ['company'])
df = merge_datasets(df, df_market_analysis, ['segment','year','quarter'])
df = merge_datasets(df, df_info, ['company','year','day','quarter'])
df.head()

'''

# Combine data from stock prices with the market segments data, merging on company
df_prices_segments = df_stock_prices.merge(df_market_segments, on="company")
print(len(df_prices_segments))
df_prices_segments.head()

df_prices_segments_market = df_prices_segments.merge(df_market_analysis, on=['segment','year','quarter'])
print(len(df_prices_segments_market))
df_prices_segments_market.head()

df = df_prices_segments_market.merge(df_info, on=['company','year','day','quarter'])
print(len(df))
df.head()

3012
3012
3012


Unnamed: 0,company,year,day,quarter,stock_price,segment,trend,expert1_prediction,expert2_prediction,sentiment_analysis,m1,m2,m3,m4
0,0,2017,3,0,102.2,IT,-1,0,0,10,6.3,1824.0,-1.0,0
1,0,2017,4,0,102.2,IT,-1,0,1,10,5.1,6912.0,-0.9,0
2,0,2017,5,0,102.2,IT,-1,0,1,10,6.6,8928.0,0.3,0
3,0,2017,6,0,102.2,IT,-1,0,1,10,7.8,6924.0,0.0,0
4,0,2017,9,0,102.2,IT,-1,0,1,10,-0.9,5635.0,0.9,0


In [12]:
# PREPROCESSING

# replace of "segment" literals values into numeric values
df = df.replace(to_replace='BIO', value=0)
df = df.replace(to_replace='IT', value=1)

# normalization of m2 values in a 0-1 range
df['m2'] = round(df['m2'] / max(df['m2']), 3)

print(df.shape[0])
data_new2 = df.copy()                          # Create duplicate of data
data_new2.dropna(inplace = True)                      # Remove rows with NaN
print(data_new2.shape[0]) 
df.head()

3012
3012


Unnamed: 0,company,year,day,quarter,stock_price,segment,trend,expert1_prediction,expert2_prediction,sentiment_analysis,m1,m2,m3,m4
0,0,2017,3,0,102.2,1,-1,0,0,10,6.3,0.183,-1.0,0
1,0,2017,4,0,102.2,1,-1,0,1,10,5.1,0.692,-0.9,0
2,0,2017,5,0,102.2,1,-1,0,1,10,6.6,0.894,0.3,0
3,0,2017,6,0,102.2,1,-1,0,1,10,7.8,0.693,0.0,0
4,0,2017,9,0,102.2,1,-1,0,1,10,-0.9,0.564,0.9,0


In [13]:
# create DataSets

# Split the data into test and training sets (and independent and dependent variables)
from sklearn.model_selection import train_test_split
target = df['stock_price']
df.drop('stock_price', inplace=True, axis=1)

train_X, test_X, train_y, test_y = train_test_split(df, target, test_size=0.20)

In [14]:
train_y.values.shape

(2409,)

In [15]:
# conversion the independent and dependent data of the training set into two 
# separate tensor that we pass to a TensorDataset
X = torch.tensor(train_X.values, dtype=torch.float)
y = torch.tensor(train_y.values, dtype=torch.int64)
train_ds = torch.utils.data.TensorDataset(X, y)
'''
X = torch.tensor(test_X.values, dtype=torch.float)
y = torch.tensor(test_y.values, dtype=torch.int64)
test_ds = torch.utils.data.TensorDataset(X, y)
'''
X = torch.tensor(test_X.values, dtype=torch.float)
y = torch.tensor(test_y.values, dtype=torch.int64)
test_ds = torch.utils.data.TensorDataset(X, y)

In [16]:
# Create dataloaders
train_dl = torch.utils.data.DataLoader(train_ds, batch_size=603, shuffle=True)
test_dl = torch.utils.data.DataLoader(test_ds, batch_size=603, shuffle=False)

In [20]:
model = network.Network()

# Train my neural network.
optimizer = torch.optim.Adam(model.parameters(), lr=0.0005)
for epochs in range(5):
  for data in train_dl:
    X, y = data
    optimizer.zero_grad() # clear gradient information.
    output = model(X.view(-1, 13))
    print(y.shape)
    print(output.shape)
    loss = nn.functional.nll_loss(output, y)
    loss.backward() # do pack-propagation step
    optimizer.step() # update weights.

torch.Size([603])
torch.Size([603, 3])


IndexError: ignored

In [None]:
# definition of Predictor object
predictor_name = 'marketPredictor'
net = model
predictor = predictor.Predictor(predictor_name, net)

In [None]:
simulator.simulate(2017, 5, predictor)