# Task 2
## Stock Prices Time Series Prediction
### Imports

In [1]:
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
from sklearn import metrics

### Read data into memory

In [2]:
data = pd.read_csv('sp500.csv')
data = data.drop(['Unnamed: 0'], axis = 1)

### Correlate data

In [3]:
correlation_matrix = data.corr()

### Find Highest correlation
Get the single column with the highest correlation

In [4]:
sp_correlations = np.array(correlation_matrix[['SP500']])[1:]
correlation_index = np.argmax(sp_correlations) + 1

### Extract The Desired Dataset

In [5]:
desired_stock = data.iloc[:, correlation_index]

input_set_1d  = np.array(desired_stock)
output_set_1d = np.array(desired_stock.shift(-5).dropna())
# instead of shifting in pandas we construct time sequence in a more traditional way
def construct_time_series(best_stock_id,data):
    x = []
    y = []
    windowSize = 4
    for i in range(data.shape[0] - 2 * windowSize):
        x += [data[best_stock_id][i:i + windowSize+1]]
        y += [data[best_stock_id][i + windowSize + 1:i + 2 *windowSize + 1]]

    return np.array(x), np.array(y)

input_set, output_set = construct_time_series(data.columns[correlation_index],data)

train_ratio = 0.8
test_ratio  = 0.2
valid_ratio = 0.1 # Will be used in the MLPClassifier to take 10% of the training data as validation

training_input, testing_input, training_target, testing_target = train_test_split(input_set,output_set,
                                                                                  test_size=test_ratio,
                                                                                  random_state=1)

### Create the Linear regressor
The best MLP is implemented using adam optimizer with the parameters stated below.

Other than the optimizer, all other parameters will remain consistent for both models.

#### Scoring Metric Used:
*The mean squared error will be used as a scoring metric. The lower the error, the better the optimizer!*

#### Regularization Technique Used:
*Early stopping and L2 regularization were used to avoid overfitting*

In [13]:
hidden_layer        = (512, 512, 512) # Hidden layers and nodes, per layer
regularization_rate = 0.00001         # Rate for L2 regularization
learning_rate_start = 0.01            # Initial learning rate
learning_rate_mode  = 'adaptive'      # Mode for changing learning rate
batch_size          = 128             # Size of batch for one update
tolerance           = 1e-6            # Tolerance level for regressor
max_iteration       = 1000            # Maximum number of iterations before stopping
no_change_tolerance = 10              # Stop if no significant change happens in this number of iterations

# n_iter_no_change=10

model_adm = MLPRegressor(hidden_layer_sizes=hidden_layer, solver='adam', alpha=regularization_rate,
                          batch_size=batch_size, learning_rate=learning_rate_mode,
                          learning_rate_init=learning_rate_start, max_iter=max_iteration, tol=tolerance,
                          verbose=True, early_stopping=True, validation_fraction=valid_ratio,
                          n_iter_no_change=no_change_tolerance)
print(training_input.shape)
print(training_target.shape)

(1001, 5)
(1001, 4)


### Give The Models The Training Data And Fit Them!

#### The Adam Optimizer

In [19]:
model_adm.fit(training_input, training_target)

Iteration 1, loss = 4154.78541114
Validation score: 0.693364
Iteration 2, loss = 146.76765705
Validation score: 0.535554
Iteration 3, loss = 57.23545660
Validation score: 0.694966
Iteration 4, loss = 20.80334419
Validation score: 0.954886
Iteration 5, loss = 4.74353649
Validation score: 0.993402
Iteration 6, loss = 1.16946092
Validation score: 0.993453
Iteration 7, loss = 1.29222055
Validation score: 0.991987
Iteration 8, loss = 2.72631533
Validation score: 0.986645
Iteration 9, loss = 1.74708249
Validation score: 0.944879
Iteration 10, loss = 3.38698612
Validation score: 0.992483
Iteration 11, loss = 2.07907645
Validation score: 0.993564
Iteration 12, loss = 0.98347800
Validation score: 0.993853
Iteration 13, loss = 0.79013128
Validation score: 0.988073
Iteration 14, loss = 1.11792083
Validation score: 0.991162
Iteration 15, loss = 0.95003251
Validation score: 0.990534
Iteration 16, loss = 1.00271607
Validation score: 0.993762
Iteration 17, loss = 0.81937022
Validation score: 0.993823

MLPRegressor(activation='relu', alpha=1e-05, batch_size=128, beta_1=0.9,
       beta_2=0.999, early_stopping=True, epsilon=1e-08,
       hidden_layer_sizes=(512, 512, 512), learning_rate='adaptive',
       learning_rate_init=0.01, max_iter=1000, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=None, shuffle=True, solver='adam', tol=1e-06,
       validation_fraction=0.1, verbose=True, warm_start=False)

In [20]:
predictions_adm = model_adm.predict(testing_input)
mse_adm = metrics.mean_squared_error(predictions_adm,testing_target)
print("Adam's Mean-Squared-Error:", mse_adm)

Adam's Mean-Squared-Error: 1.262530059018131


#### Prediction sample for 3 instances 
 similarity justification: As seen in the previous cell the MSE is low which means that the network is able to predict the values highly similar to the target values.

In [21]:
print("predictions:",predictions_adm[:4])
print("\ntargets",testing_target[:4])

predictions: [[55.69142674 55.91490474 55.8229445  55.77710291]
 [67.90431168 68.10460985 68.06071851 68.01757812]
 [91.19786121 91.3536487  91.4129308  91.37371288]
 [53.3469709  53.57091512 53.47235772 53.42966888]]

targets [[55.2159 55.3611 55.8063 56.0192]
 [68.2958 68.3745 68.4926 68.7289]
 [90.5851 87.7391 84.913  86.6644]
 [53.5412 53.9691 53.0938 52.1504]]
