In [None]:
# Data Organisation and Visualisation
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import pathlib
import csv

# SciKit Learn Feature Selection and Data Split
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import mutual_info_classif, mutual_info_regression
from sklearn import metrics  # metrics.mean_squared_error, metrics.r2_score

# SciKit Learn Machine Learning Libraries
from sklearn import preprocessing
from sklearn.ensemble import GradientBoostingRegressor  #Set 1, Model 1
from sklearn.ensemble import RandomForestRegressor  #Set 1, Model 2

# PyTorch and fast.ai Machine Learning Libraries
import torch.nn as nn  #contains neural network layer methods
import torch.nn.functional as F  #specifically convolutional layer methods
import torch.optim as optim  #pytorch optimization algorithms
from fastai.tabular import *  #Used in Set 2, Model 1 & 2 experiments

# Miscellaneous
import random  #random number generation used for testing
import time  #timing individual sections of code
from tqdm.notebook import tqdm  #progress bar for 'for loops'
from datetime import datetime  #accurate time stamp indexing

In [None]:
def pull_nasdaq100(path, tp):
    filename = 'nasdaq100.csv'
    data = pd.read_csv(path/filename) #pull data from filename
    j=1
    for i in reversed(tp):
        k = str(i/390)
        #CREATE STAGGERED TARGET COLUMN  (REGRESSION TARGET)
        target_list = data[target].tolist()  #creates list from target column
        target_list.extend([np.nan for x in range(i)])  #extends list with NaNs for specified time period
        target_list = target_list[i:]  #cuts the list back to size

        #CREATE UP/DOWN TARGET COLUMN  (CLASSIFICATION TARGET)
        classification_list = []
        for i in range(len(target_list)):
            if target_list[i] >= data['NDX'][i]:  item = 1
            else:  item = 0
            classification_list.append(item)
        
        data.insert(0, 'Classification Target: +'+k+' Days', classification_list) #insert
        data.insert(0+j, 'Regression Target: +'+k+' Days', target_list) #insert
        j+=1 
    return data.dropna()  #remove rows with NaN


def feature_selection(data, num_features, mode):
    #mode either classif or regress
    #split full df into x and y, feature select, to df then rejoin  -  we lose data in column names, but that's ok
    columns,index = data.columns.to_list(), data.index.values  #extract columns and index of x
    if mode == "classification":
        data = data.drop(data.columns[range(10,20)], axis=1)  #drop the regression targets
        X,y = data.drop(data.columns[range(0,10)], axis=1), data[data.columns[range(0,10)]]
        X = SelectKBest(mutual_info_classif, k=num_features).fit_transform(X,y.iloc[:,0])  #this reduces input features
        
    if mode == "regression":
        data = data.drop(data.columns[range(0,10)], axis=1)  #drop the classification targets
        X,y = data.drop(data.columns[range(0,10)], axis=1), data[data.columns[range(0,10)]]
        X = SelectKBest(mutual_info_regression, k=num_features).fit_transform(X,y.iloc[:,0])  #this reduces input features
    
    X = pd.DataFrame(X, index=index)
    data = pd.concat([y, X], axis=1, sort=False)
    return data


def sequence_input(data, sequence_length, sequence_spacing):
    df_main, df_x = data.iloc[:,:10], data.iloc[:,10:]
    nan_unit = pd.DataFrame(np.nan, columns=df_x.columns, index=range(sequence_spacing)) #create nan row df
    y_cols = df_main.columns.tolist()

    for i in range(sequence_length):  #for i we create and concat a new temp_x
        nan_block = nan_unit.iloc[:0, :]  #create 
        for j in range(i):   #create nan block to add to start of temp_x
            nan_block = pd.concat([nan_block, nan_unit], ignore_index=True)
        temp_x = pd.concat([nan_block, df_x], ignore_index=True)  #make overlength x
        df_main = pd.concat([df_main, temp_x], axis=1, ignore_index=True)
        df_cols = y_cols + list(range(len(df_main.columns)-10))
        df_main.columns = df_cols
    return df_main.dropna()

   

def split(data):   #split data into train, valid and test sets
    data_train, data_test = data[:-5000], data[-5000:]
    return data_train, data_test


def scale(data_train, data_test):
    #set up normalization on train, apply to valid and test
    train_x = data_train.iloc[:, 10:]  #pull x values from train data
    scaler = preprocessing.StandardScaler().fit(train_x) #fit scale model to train set
    
    data, scaled_data = [data_train, data_test], []
    for df in data:
        df_y, df_x = df.iloc[:, :10], df.iloc[:, 10:]  #splits data into y and x columns
        columns,index = df_x.columns.to_list(), df_x.index.values  #extract columns and index of x
        x_scaled = scaler.transform(df_x)  #returns np.array
        df_x = pd.DataFrame(x_scaled, columns=columns, index=index)
        df = pd.concat([df_y, df_x], axis=1, sort=False)
        scaled_data.append(df)
        
    return scaled_data[0], scaled_data[1]  #train, valid, test


def truncate(data, n_rows):
    #REDUCE df_nasdaq100 INTO THE SMALLER dataframe        
    data = data.iloc[-n_rows:]
    return data


def split_Xy(data_train, data_test):
    data_list = [data_train, data_test]
    X_list, y_list = [], []
    for data in data_list:  X_list.append(data.iloc[:, 10:]), y_list.append(data.iloc[:, :10])
    return [X_list[0],y_list[0]],  [X_list[1],y_list[1]]



#SCORE AND GRAPHING FUNCTIONS

def score_classif(model, target, predicted): #score classification result
    tested,correct = 0,0
    for i in range(len(target)):
        if   target[i] == predicted[i]: correct,tested = correct+1,tested+1
        elif target[i] != predicted[i]: tested +=1
    print(model + " Classification Score: ", (correct/tested)*100)

    
def score_regress(target, prediction):  #target is as a series, prediciton is as an np.array
    for i in range(1, len(target)):
        score_list  = [metrics.r2_score(target, prediction),
                       np.sqrt(metrics.mean_squared_error(target, prediction))]  # [r2, mse]
    return score_list


def plot_col(targ, pred, col):  #pring regression results
    #plot specified columns in pred vs. targ
    x_axis = list(range(len(df_test)))
    # Data for plotting

    for i in range(len(col)):
        fig, a = plt.subplots()  #1,len(col))
        a.plot(x_axis, targ.iloc[:, col[i]], label='NDX^ Target Value')
        a.plot(x_axis, pred.iloc[:, col[i]], label='NDX^ Predicted Value')
        a.set(xlabel='Test Dataset Index', ylabel='NDX^ Value',
              title='Forecast: +'+str((tp[col[i]-1])/390)+' Days')
        a.legend(loc='upper right')
        #plt.tight_layout()
        plt.show()


def plot_row(targ, pred, row):
    fig = go.Figure()
    x_axis = ['NDX Base Truth']
    for i in pred.columns.tolist()[1:]: x_axis.append(i.split(" ", 2)[2])

    for j in range(len(row)):
        fig, a = plt.subplots() #1,len(row))
        a.plot(x_axis, targ.iloc[row[j]], label='NDX^ Target Value')
        a.plot(x_axis, pred.iloc[row[j]], label='NDX^ Predicted Value')
        a.set(xlabel='Time (Days)', ylabel='NDX^ Value',
              title='0.5-5 Day Forecast: Row Index: '+str(row[j]))
        a.legend(loc='upper right')
        a.set_xticklabels(x_axis, rotation=40, ha='right')
        plt.show()


In [None]:
filename = 'nasdaq100.csv'     # data file to be used
path = pathlib.Path.home()/'OneDrive'/'19-20 3rd Yr MEng'/'Dissertation'/'Data'/'NASDAQ-100 II'
#path = pathlib.Path.cwd()
target = 'NDX'  #target column name to be predicted

data_norm = True
data_redu = True
it_input_features = [20]
it_data_rows = [30000]

tp1  = [39, 78, 117, 156, 195, 234, 273, 312, 351, 390]  #10 models up to 1 day
tp5  = [195, 390, 585, 780, 975, 1170, 1365, 1560, 1755, 1950]  #10 models up to 5 days
tp10 = [390, 780, 1170, 1560, 1950, 2340, 2730, 3120, 3510, 3900]  #10 models up to 10 days

tp = tp5

In [None]:
df_full = pull_nasdaq100(path, tp)

#graph data
x_axis = list(range(len(df_full)))
y_ndx = df_full['NDX']

#targets
fig = go.Figure()
fig.add_shape(dict(type="line",x0=len(df_full)-5460,y0=4600,x1=len(df_full)-5460,y1=5000,line=dict( color="Red", width=1)))
fig.add_trace(go.Scatter(x=x_axis, y=y_ndx, fill=None, mode='lines', name='NDX^ Price'))
for column in df_full.columns.tolist()[10:20]:
    fig.add_trace(go.Scatter(x=x_axis, y=df_full[column], fill=None, mode='lines', name=column))
fig.update_layout(title="NASDAQ100 NDX^:  July 26th to December 22nd 2016", xaxis_title="Time", yaxis_title="NDX^ value")
fig.show()


#inputs
fig = go.Figure()
fig.add_shape(dict(type="line",x0=len(df_full)-5460,y0=0,x1=len(df_full)-5460,y1=200,line=dict( color="Red", width=1)))
for column in df_full.columns.tolist()[20:]:
    fig.add_trace(go.Scatter(x=x_axis, y=df_full[column], fill=None, mode='lines', name=column))
fig.update_layout(title="NASDAQ100 NDX^:  July 26th to December 22nd 2016", xaxis_title="Time", yaxis_title="NDX^ value")
fig.show()

In [None]:



########################################

## Set 1, Models 1 & 2:   Decision Tree Methods

########################################




In [None]:
#####
## Set 1, Models 1 and 2
#####

tp = [195, 390, 585, 780, 975, 1170, 1365, 1560, 1755, 1950]  #10 models up to 5 days
input_features = 20
learning_rate = 0.1
data_rows = 30000
n_estimators = 100

s1_m1 = []
s1_m2 = []


#########################



df_full = pull_nasdaq100(path, tp)  #import data with y_c and y_r cols
df = feature_selection(df_full, input_features, 'regression')  #reduce columns
df_train, df_test = split(df)  #split train/valid/test
df_train, df_test = scale(df_train, df_test)  #normalize and scale data
df_train = truncate(df_train, data_rows)
train_R, test_R = split_Xy(df_train, df_test)
[X_train,y_train], [X_test,y_test] = train_R, test_R

input_list = df_train.columns.tolist()[10:]  #isolate list of inputs
target_list = df_train.columns.tolist()[:10]  #list of target column names
ndx_df = pd.DataFrame(df_full['NDX'])  #create df with just ndx
ndx_df.rename(columns={"NDX": "NDX Base Truth"})  #rename ndx column

##################



for n_estimators in tqdm([100], leave=True):
    for learning_rate in [0.01]:
        tag = "n_estimators: "+str(n_estimators)  #what experiment is this?

        ###############################

        #Set 1, Model 1
        n_estimators = 200
        learning_rate = 0.1
        df_targ = pd.concat([ndx_df, y_test], axis=1).dropna()  #creates target dataframe, dropna to remove excess ndx
        df_pred = df_targ.iloc[:, :1]  #creates empty prediction dataframe
        df_score = pd.DataFrame(index=["R^2 Score", "Root Mean Square Error"])  #create score df


        for column in tqdm(target_list, leave=False):  #train for each target column
            col_num = column.split(" ", 2)[2]

            model = GradientBoostingRegressor(loss='ls', learning_rate=learning_rate, n_estimators=n_estimators, subsample=1.0,
                                              criterion='friedman_mse', min_samples_split=2, min_samples_leaf=1,
                                              min_weight_fraction_leaf=0.0, max_depth=3, min_impurity_decrease=0.0,
                                              min_impurity_split=None, init=None, random_state=None, max_features=None,
                                              alpha=0.9, verbose=0, max_leaf_nodes=None, warm_start=False, presort='deprecated',
                                              validation_fraction=0.1, n_iter_no_change=None, tol=0.0001, ccp_alpha=0.0)

            model.fit(X_train, y_train[column])   #fit on y column of this for loop
            y_pred = model.predict(X_test)  #predict and release as an array
            df_pred.insert(len(df_pred.columns), "Regression Prediction: "+col_num, y_pred) #join df_pred with predictions
            score = score_regress(y_test[column], y_pred)  #score model  (series, np.array)
            df_score.insert(len(df_score.columns), col_num, score )  #insert score into score df

        s1_m1.append([df_targ, df_pred, df_score, tag])


        ###############################

        '''#Set 1, Model 2:  Random Forest
        #n_estimators = 
        df_targ = pd.concat([ndx_df, y_test], axis=1).dropna()  #creates target dataframe, dropna to remove excess ndx
        df_pred = df_targ.iloc[:, :1]  #just ndx column, will be added to later on
        df_score = pd.DataFrame(index=["R^2 Score", "Mean Square Error"])  #create score df

        for column in tqdm(target_list, leave=False):
            col_num = column.split(" ", 2)[2]

            model = RandomForestRegressor(n_estimators=n_estimators, criterion='mse', max_depth=None, min_samples_split=2,
                                          min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features='auto',
                                          max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None,
                                          bootstrap=True, oob_score=False, n_jobs=None, random_state=None, verbose=0,
                                          warm_start=False, ccp_alpha=0.0, max_samples=None)

            model.fit(X_train, y_train[column])   #fit on y column of this for loop
            y_pred = model.predict(X_test)  #predict and release as an array
            df_pred.insert(len(df_pred.columns), "Regression Prediction: "+col_num, y_pred) #join df_pred with predictions
            score = score_regress(y_test[column], y_pred)  #score model  (series, np.array)
            df_score.insert(len(df_score.columns), col_num, score )  #insert score into score df

        s1_m2.append([df_targ, df_pred, df_score, tag])  #returns df_pred, df_targ, and df_score
    '''

In [None]:
df_targ, df_pred, df_score, tag = s1_m2[1]
df_targ

In [None]:
#df_targ1, df_pred1, df_score1 = s1_m1  #set 1 model 1
#df_targ2, df_pred2, df_score2 = s1_m2  #set 1 model 2

#I'm still sat here with df_targ, df_pred, df_score, plus all the 
#plot_row(df_targ1, df_pred1, [1000,2500,4000])  #plot row
#plot_col(df_targ1, df_pred1, [1,5,10])  #plots predictions over time series
#print(df_score1)

print("\n\nSET 1 MODEL 1\n")  
for i in range(9):
    df_targ, df_pred, df_score, tag = s1_m1[i]
    print(tag)
    plot_row(df_targ, df_pred, [1000, 2500, 4500])  #plot row
    plot_col(df_targ, df_pred, [1,5,10])
    print(df_score)
    print("\n-----\n-----\n")

In [None]:











########################################
## Set 1, Models 1 & 2
########################################












In [None]:
'''#Regression Set 1, Model 1:  Linear Regression

#####
## REGRESSION
#####

df_full = pull_nasdaq100(path, tp5)  #import data with y_c and y_r cols

df = feature_selection(df_full, input_features, 'regression')  #reduce columns
df_train, df_test = split(df)  #split train/valid/test
df_train, df_test = scale(df_train, df_test)  #normalize and scale data
df_train = truncate(df_train, data_rows)
train_R, test_R = split_Xy(df_train, df_test)
[X_train,y_train], [X_test,y_test] = train_R, test_R



input_list = df_train.columns.tolist()[10:]
target_list = df_train.columns.tolist()[:10]

ndx_df = pd.DataFrame(df_full['NDX'])  #create df with just ndx
ndx_df.rename(columns={"NDX": "NDX Base Truth"})  #rename ndx column
df_targ = pd.concat([ndx_df, y_test], axis=1).dropna()  #creates target dataframe, dropna to remove excess ndx
df_pred = df_targ.iloc[:, :1]  #just ndx column, will be added to later on
df_score = pd.DataFrame(index=["R^2 Score", "Mean Square Error"])  #create score df

for column in tqdm(target_list):
    col_num = column.split(" ", 2)[2]
    
    model = LinearRegression(fit_intercept=True, normalize=False, copy_X=True, n_jobs=None)  #define model
    
    model.fit(X_train, y_train[column])   #fit on y column of this for loop
    y_pred = model.predict(X_test)  #predict and release as an array
    df_pred.insert(len(df_pred.columns), "Regression Prediction: "+col_num, y_pred) #join df_pred with predictions
    score = score_regress(y_test[column], y_pred)  #score model  (series, np.array)
    df_score.insert(len(df_score.columns), col_num, score )  #insert score into score df


s1_m1 = [df_targ, df_pred, df_score]  #returns df_pred, df_targ, and df_score



###
###



#Regression Set 1, Model 1:  SGD Regressor
#CREATE LIST OF MODELS, ONE PER ITEM IN TP
input_list = df_train.columns.tolist()[10:]
target_list = df_train.columns.tolist()[:10]

ndx_df = pd.DataFrame(df_full['NDX'])  #create df with just ndx
ndx_df.rename(columns={"NDX": "NDX Base Truth"})  #rename ndx column
df_targ = pd.concat([ndx_df, y_test], axis=1).dropna()  #creates target dataframe, dropna to remove excess ndx
df_pred = df_targ.iloc[:, :1]  #just ndx column, will be added to later on
df_score = pd.DataFrame(index=["R^2 Score", "Mean Square Error"])  #create score df

for column in tqdm(target_list):
    col_num = column.split(" ", 2)[2]
    
    model = SGDRegressor(loss='squared_loss', penalty='l2', alpha=0.0001, l1_ratio=0.15,
                      fit_intercept=True, max_iter=1000, tol=0.001, shuffle=True, verbose=0,
                      epsilon=0.1, random_state=None, learning_rate='invscaling', eta0=0.01,
                      power_t=0.25, early_stopping=False, validation_fraction=0.1,
                      n_iter_no_change=5, warm_start=False, average=False)
    
    model.fit(X_train, y_train[column])   #fit on y column of this for loop
    y_pred = model.predict(X_test)  #predict and release as an array
    df_pred.insert(len(df_pred.columns), "Regression Prediction: "+column.split(" ", 2)[2], y_pred) #join df_pred with predictions
    score = score_regress(y_test[column], y_pred)  #score model  (series, np.array)
    df_score.insert(len(df_score.columns), col_num, score )  #insert score into score df


s1_m2 = [df_targ, df_pred, df_score]  #returns df_pred, df_targ, and df_score'''



###
'''###

#Regression Set 1, Model 5:  Gradient Boosting
#CREATE LIST OF MODELS, ONE PER ITEM IN TP
input_list = df_train.columns.tolist()[10:]  #isolate list of inputs
target_list = df_train.columns.tolist()[:10]  #isolate list of targets
ndx_df = pd.DataFrame(df_full['NDX'])  #create df with just ndx
ndx_df.rename(columns={"NDX": "NDX Base Truth"})  #rename ndx column
df_targ = pd.concat([ndx_df, y_test], axis=1).dropna()  #creates target dataframe, dropna to remove excess ndx
df_pred = df_targ.iloc[:, :1]  #just ndx column, will be added to later on
df_score = pd.DataFrame(index=["R^2 Score", "RMSE"])  #create score df

for column in tqdm(target_list):
    col_num = column.split(" ", 2)[2]
    
    model = GradientBoostingRegressor(loss='ls', learning_rate=0.1, n_estimators=100, subsample=1.0, criterion='friedman_mse', 
                                      min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_depth=3, 
                                      min_impurity_decrease=0.0, min_impurity_split=None, init=None, random_state=None,
                                      max_features=None, alpha=0.9, verbose=0, max_leaf_nodes=None, warm_start=False, 
                                      presort='deprecated', validation_fraction=0.1, n_iter_no_change=None, tol=0.0001,
                                      ccp_alpha=0.0)
    
    model.fit(X_train, y_train[column])   #fit on y column of this for loop
    y_pred = model.predict(X_test)  #predict and release as an array
    df_pred.insert(len(df_pred.columns), "Regression Prediction: "+col_num, y_pred) #join df_pred with predictions
    score = score_regress(y_test[column], y_pred)  #score model  (series, np.array)
    df_score.insert(len(df_score.columns), col_num, score )  #insert score into score df

s1_m5 = [df_targ, df_pred, df_score]  #returns df_pred, df_targ, and df_score



###'''
'''###

#Regression Set 1, Model 6: AdaBoost
input_list = df_train.columns.tolist()[10:]  #isolate list of inputs
target_list = df_train.columns.tolist()[:10]  #isolate list of targets
ndx_df = pd.DataFrame(df_full['NDX'])  #create df with just ndx
ndx_df.rename(columns={"NDX": "NDX Base Truth"})  #rename ndx column
df_targ = pd.concat([ndx_df, y_test], axis=1).dropna()  #creates target dataframe, dropna to remove excess ndx
df_pred = df_targ.iloc[:, :1]  #just ndx column, will be added to later on
df_score = pd.DataFrame(index=["R^2 Score", "Mean Square Error"])  #create score df

for column in tqdm(target_list):
    col_num = column.split(" ", 2)[2]
    
    model = AdaBoostRegressor(base_estimator=None, n_estimators=50, learning_rate=1.0, loss='linear', random_state=None)
    
    model.fit(X_train, y_train[column])   #fit on y column of this for loop
    y_pred = model.predict(X_test)  #predict and release as an array
    df_pred.insert(len(df_pred.columns), "Regression Prediction: "+col_num, y_pred) #join df_pred with predictions
    score = score_regress(y_test[column], y_pred)  #score model  (series, np.array)
    df_score.insert(len(df_score.columns), col_num, score )  #insert score into score df

s1_m6 = [df_targ, df_pred, df_score]  #returns df_pred, df_targ, and df_score


###'''
###























#####
## CLASSIFICATION
#####

'''
df_full = pull_nasdaq100(path, tp)  #import data with y_c and y_r cols

df = feature_selection(df_full, input_features, 'classification')  #reduce columns
df_train, df_test = split(df)  #split train/valid/test
df_train, df_test = scale(df_train, df_test)  #normalize and scale data
df_train = truncate(df_train, data_rows)
train_C, test_C = split_Xy(df_train, df_test)
[X_train,y_train], [X_test,y_test] = train_C, test_C


###
###

##################
#Classification Model 0:  Logistic Regression

Model0 = LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=1.0, fit_intercept=True,
                            intercept_scaling=1, class_weight=None, random_state=None,
                            solver='lbfgs', max_iter=100, multi_class='auto', verbose=0,
                            warm_start=False, n_jobs=None, l1_ratio=None)

Model0.fit(X_train,y_train)
y_pred0, y_test0 = Model0.predict(X_test).tolist(), y_test.values.tolist()
score_classif("Logistic Regression", y_test0, y_pred0)


###
###

#######################
#Classification Model 1:  SGD Classifier
            
Model1 = SGDClassifier(loss='hinge', penalty='l2', alpha=0.0001, l1_ratio=0.15, fit_intercept=True,
                       max_iter=1000, tol=0.001, shuffle=True, verbose=0, epsilon=0.1, n_jobs=None,
                       random_state=None, learning_rate='optimal', eta0=0.0, power_t=0.5,
                       early_stopping=False, validation_fraction=0.1, n_iter_no_change=5, 
                       class_weight=None, warm_start=False, average=False)

Model1.fit(X_train,y_train)
y_pred1, y_test1 = Model1.predict(X_test).tolist(), y_test.values.tolist()
score_classif("SGD Regression", y_test1, y_pred1)

'''

###
###




#####
## CLASSIFICATION
#####
'''

df_full = pull_nasdaq100(path, tp)  #import data with y_c and y_r cols

df = feature_selection(df_full, input_features, 'classification')  #reduce columns
df_train, df_test = split(df)  #split train/valid/test
df_train, df_test = scale(df_train, df_test)  #normalize and scale data
df_train = truncate(df_train, data_rows)
train_C, test_C = split_Xy(df_train, df_test)
[X_train,y_train], [X_test,y_test] = train_C, test_C


###
###

##################
#Classification Model 0:  Single decision tree method

Model0 = DecisionTreeClassifier(criterion='gini', splitter='best', max_depth=None,
                                min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0,
                                max_features=None, random_state=None, max_leaf_nodes=None,
                                min_impurity_decrease=0.0, min_impurity_split=None, class_weight=None,
                                presort='deprecated', ccp_alpha=0.0)

Model0.fit(X_train,y_train)
y_pred0, y_test0 = Model0.predict(X_test).tolist(), y_test.values.tolist()
score_classif("SGD Regression", y_test0, y_pred0)


###
###

#######################
#Classification Model 1:  Random Forest
            
Model1 = RandomForestClassifier(n_estimators=100, criterion='gini', max_depth=None,
                                min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0,
                                max_features='auto', max_leaf_nodes=None, min_impurity_decrease=0.0,
                                min_impurity_split=None, bootstrap=True, oob_score=False,
                                n_jobs=None, random_state=None, verbose=0, warm_start=False,
                                class_weight=None, ccp_alpha=0.0, max_samples=None)

Model1.fit(X_train,y_train)
y_pred1, y_test1 = Model1.predict(X_test).tolist(), y_test.values.tolist()
score_classif("SGD Regression", y_test1, y_pred1)



###
###

#####################
#Classification Model 2:  Gradient Boosting

Model2 = GradientBoostingClassifier(loss='deviance', learning_rate=0.1, n_estimators=100,
                                    subsample=1.0, criterion='friedman_mse', min_samples_split=2,
                                    min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_depth=3,
                                    min_impurity_decrease=0.0, min_impurity_split=None,init=None,
                                    random_state=None, max_features=None, verbose=0,
                                    max_leaf_nodes=None, warm_start=False, presort='deprecated',
                                    validation_fraction=0.1, n_iter_no_change=None, tol=0.0001,
                                    ccp_alpha=0.0)

Model2.fit(X_train,y_train)
y_pred2, y_test2 = Model2.predict(X_test).tolist(), y_test.values.tolist()
score_classif("SGD Regression", y_test2, y_pred2)


###
###

######################
#Classification Model 3:  AdaBoost

Model3 = AdaBoostClassifier(base_estimator=None, n_estimators=50, learning_rate=1.0,
                            algorithm='SAMME.R', random_state=None)

Model3.fit(X_train,y_train)
y_pred3, y_test3 = Model3.predict(X_test).tolist(), y_test.values.tolist()
score_classif("SGD Regression", y_test3, y_pred3)

'''
########################



In [None]:


#######################


#######################



In [None]:
#Plot Single Row
[df_targ, df_pred, df_score] = s1_m2

for i in [0, 4999]:
    plot_row(df_targ, df_pred, i)

for i in [0.5, 5]:
    j = tp.index(i*390)
    plot_col(df_targ, df_pred, j)
    
df_score

In [None]:
for i in range(5):
    print(i)

In [None]:
df_train

In [None]:
col = [1,5,10]
for i in range(len(col)):
    print(tp[col[i]-1])