In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# python_version 3.7.10

### &nbsp;&nbsp;&nbsp; %CLASS% StockClass: used to create the stock dictionary

In [3]:
class StockClass(object):

    # Initialization of the StockClass object with the ticker symbol which is use to construct a yf.Ticker object
    def __init__(self, ticker, isin=None, exchangeid=None, sector=None, industry=None, country=None, pe=None, eps=None,
                 insiderown=None, shsout=None, shsfloat=None, mktcap=None, income=None, sales=None,
                 booksh=None, pb=None, roa=None, tp=None, roe=None, roi=None, employees=None, debteq=None, 
                 groupby=None, confusion_matrix=None, accuracy_report=None, 
                 confusion_matrix2=None, accuracy_report2=None):
        self.name = ticker
        self.isin = isin
        self.exchangeid = exchangeid
        # self.history is the method of the SotckClass object to store data in DataFrame format
        self.history = pd.DataFrame
        self.investing = pd.DataFrame
        self.pickle = pd.DataFrame
        self.not_found = np.array([['date', 'ticker']])
        self.nanDiv = False
        self.nanSplit = False
        self.sector = None
        self.industry = industry
        self.country = country
        self.pe = pe
        self.eps = eps
        self.insiderown = insiderown
        self.shsout = shsout
        self.shsfloat = shsfloat
        self.mktcap = mktcap
        self.income = income
        self.sales = sales
        self.bookh = booksh
        self.pb = pb
        self.roa = roa
        self.tp = tp
        self.roe = roe
        self.roi = roi
        self.employees = employees
        self.debteq = debteq
        self.rsi = pd.DataFrame
        self.groupby = groupby
        self.confusion_matrix = confusion_matrix
        self.accuracy_report = accuracy_report
        self.confusion_matrix2 = confusion_matrix2
        self.accuracy_report2 = accuracy_report2

# 1. First part: import data, cleaning and arranging 

### &nbsp;&nbsp;&nbsp; * Main packages import

In [4]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

### &nbsp;&nbsp;&nbsp; * Data import

In [5]:
open_df_original = pd.read_csv('/content/drive/MyDrive/Data/20210322/open', index_col='date')
high_df_original = pd.read_csv('/content/drive/MyDrive/Data/20210322/high', index_col='date')
low_df_original = pd.read_csv('/content/drive/MyDrive/Data/20210322/low', index_col='date')
adjclose_df_original = pd.read_csv('/content/drive/MyDrive/Data/20210322/adjclose', index_col='date')
volume_df_original = pd.read_csv('/content/drive/MyDrive/Data/20210322/volume', index_col='date')

In [7]:
# to drop columns with the end of the series NaN (probably not quoted anymore)
if open_df_original.isnull().values.any():
    print('open shape before: ', open_df_original.shape)
    open_df_original.dropna(axis=1, how='any', inplace=True)
    print('open shape after: ', open_df_original.shape)    
if high_df_original.isnull().values.any():
    print('high shape before: ', high_df_original.shape)
    high_df_original.dropna(axis=1, how='any', inplace=True)
    print('high shape after: ', high_df_original.shape)  
if low_df_original.isnull().values.any():
    print('low shape before: ', low_df_original.shape)
    low_df_original.dropna(axis=1, how='any', inplace=True)
    print('low shape after: ', low_df_original.shape)    
if adjclose_df_original.isnull().values.any():
    print('high shape before: ', adjclose_df_original.shape)
    adjclose_df_original.dropna(axis=1, how='any', inplace=True)
    print('high shape after: ', adjclose_df_original.shape)    
if volume_df_original.isnull().values.any():
    print('high shape before: ', volume_df_original.shape)
    volume_df_original.dropna(axis=1, how='any', inplace=True)
    print('high shape after: ', volume_df_original.shape)   

In [8]:
open_df = open_df_original.drop(index=open_df_original.iloc[-1:].index)
high_df = high_df_original.drop(index=high_df_original.iloc[-1:].index)
low_df = low_df_original.drop(index=low_df_original.iloc[-1:].index)
adjclose_df = adjclose_df_original.drop(index=adjclose_df_original.iloc[-1:].index)
volume_df = volume_df_original.drop(index=volume_df_original.iloc[-1:].index)

# 2. Second part: computation and assessing 

### &nbsp;&nbsp;&nbsp; * SKLearn preprocessing import to scale data

In [9]:
from sklearn import preprocessing

### &nbsp;&nbsp;&nbsp; %FEATURE% Relative variation from open to adjusted close price

In [10]:
# relative variation from open to adjusted close price
adjclose_rel_var_df = (adjclose_df-open_df)/open_df

### &nbsp;&nbsp;&nbsp; %FEATURE% Absolute variation between high and low price

In [11]:
# absolute variation between high and low price
high_low_var_df = (high_df-low_df)

# to scale the absolute variation between min&max value
high_low_var_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
high_low_var_scaled = high_low_var_scaler.fit_transform(high_low_var_df)
high_low_var_scaled_df = pd.DataFrame(data=high_low_var_scaled, index=high_low_var_df.index, columns=high_low_var_df.columns)

### &nbsp;&nbsp;&nbsp; %FEATURE% High low absolute variation over adjusted close price

In [12]:
# high low absolute variation over adjusted close price
high_low_var_df_adjclose = high_low_var_df/adjclose_df

### &nbsp;&nbsp;&nbsp; %FEATURE% Log Return (adjusted close price log return)

In [13]:
adjclose_df_log_return = np.log(adjclose_df/adjclose_df.shift(1))

### &nbsp;&nbsp;&nbsp; * Construction of stack dataset with all features and label to classify

In [14]:
adjclose_rel_var_df_t = adjclose_rel_var_df.transpose()
adjclose_rel_var_df_stack = adjclose_rel_var_df_t.stack(dropna=False)

high_low_var_scaled_df_t = high_low_var_scaled_df.transpose()
high_low_var_scaled_df_stack = high_low_var_scaled_df_t.stack(dropna=False)

high_low_var_df_adjclose_t = high_low_var_df_adjclose.transpose()
high_low_var_df_adjclose_stack = high_low_var_df_adjclose_t.stack(dropna=False)

adjclose_df_t = adjclose_df.transpose()
adjclose_df_stack = adjclose_df_t.stack(dropna=False)

volume_df_t = volume_df.transpose()
volume_df_stack = volume_df_t.stack(dropna=False)

# classification variable 
classification_df = (adjclose_df_log_return > 0) * 1
classification_df_t = classification_df.transpose()
classification_df_t.shift(axis = 1, periods = -1) # sign the day before a rally up with 1 and vice versa with 0 
classification_df_stack = classification_df_t.shift(axis = 1, periods = -1).stack(dropna=False)

data = {'adjclose_rel_var': adjclose_rel_var_df_stack, 
        'high_low_var_scaled': high_low_var_scaled_df_stack,
        'high_low_var_adjclose': high_low_var_df_adjclose_stack,
        'adjclose': adjclose_df_stack,
        'volume': volume_df_stack,
        'label': classification_df_stack}  

df_concat = pd.concat(data, axis=1)
df_concat.dropna(axis = 0, how = 'any', inplace=True)
df_concat

Unnamed: 0_level_0,Unnamed: 1_level_0,adjclose_rel_var,high_low_var_scaled,high_low_var_adjclose,adjclose,volume,label
Unnamed: 0_level_1,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
A,2019-07-29,0.000102,0.092069,0.016114,69.507103,2750000.0,0.0
A,2019-07-30,0.000575,0.228046,0.030019,69.289803,2871800.0,0.0
A,2019-07-31,-0.023239,0.179888,0.025380,68.558876,3846100.0,0.0
A,2019-08-01,-0.018220,0.257791,0.033498,68.361320,2494100.0,0.0
A,2019-08-02,-0.027124,0.155808,0.023347,67.245178,2803500.0,0.0
...,...,...,...,...,...,...,...
ZYXI,2021-03-11,0.009512,0.113684,0.043581,16.980000,518100.0,0.0
ZYXI,2021-03-12,-0.000592,0.044211,0.024303,16.870001,421500.0,0.0
ZYXI,2021-03-15,-0.028554,0.147369,0.055113,16.330000,463200.0,0.0
ZYXI,2021-03-16,-0.019512,0.191579,0.069030,16.080000,483200.0,1.0


### &nbsp;&nbsp;&nbsp; * Check the correct number of row in the previous dataframe

In [42]:
# check the correct number of row in the previous dataframe
len(open_df.columns) * len(open_df.index) - len(open_df.columns)

1872542

### &nbsp;&nbsp;&nbsp; * Take the correct list of tickers

In [45]:
ticker_list = []
for item in df_concat.index:
    ticker_list.append(item[0])
ticker_list = list(dict.fromkeys(ticker_list))
ticker_list

['A',
 'AA',
 'AACG',
 'AAIC',
 'AAL',
 'AAMC',
 'AAME',
 'AAOI',
 'AAON',
 'AAP',
 'AAPL',
 'AAU',
 'AAWW',
 'AAXN',
 'ABB',
 'ABBV',
 'ABC',
 'ABCB',
 'ABEO',
 'ABEV',
 'ABG',
 'ABIO',
 'ABM',
 'ABMD',
 'ABR-PA',
 'ABR-PB',
 'ABR-PC',
 'ABR',
 'ABT',
 'ABTX',
 'ABUS',
 'AC',
 'ACA',
 'ACAD',
 'ACB',
 'ACBI',
 'ACC',
 'ACCO',
 'ACER',
 'ACET',
 'ACGL',
 'ACGLP',
 'ACH',
 'ACHC',
 'ACHV',
 'ACIA',
 'ACIU',
 'ACIW',
 'ACLS',
 'ACM',
 'ACMR',
 'ACN',
 'ACNB',
 'ACOR',
 'ACRE',
 'ACRS',
 'ACRX',
 'ACST',
 'ACTG',
 'ACU',
 'ACY',
 'ADAP',
 'ADBE',
 'ADC',
 'ADES',
 'ADI',
 'ADIL',
 'ADM',
 'ADMA',
 'ADMP',
 'ADMS',
 'ADNT',
 'ADP',
 'ADPT',
 'ADS',
 'ADSK',
 'ADT',
 'ADTN',
 'ADUS',
 'ADVM',
 'ADXS',
 'AE',
 'AEB',
 'AEE',
 'AEG',
 'AEGN',
 'AEHR',
 'AEIS',
 'AEL',
 'AEM',
 'AEMD',
 'AEO',
 'AEP',
 'AER',
 'AERI',
 'AES',
 'AESE',
 'AEY',
 'AEYE',
 'AEZS',
 'AFG',
 'AFH',
 'AFI',
 'AFIN',
 'AFL',
 'AFMD',
 'AFYA',
 'AG',
 'AGCO',
 'AGE',
 'AGEN',
 'AGFS',
 'AGI',
 'AGIO',
 'AGLE',
 'AGM-A'

### &nbsp;&nbsp;&nbsp; * Construction of stack dataset with other features and label to classify

In [16]:
open_df_t = open_df.loc[adjclose_df_log_return.index[0]:adjclose_df_log_return.index[-1]].transpose()
open_df_stack = open_df_t.stack()

high_df_t = high_df.loc[adjclose_df_log_return.index[0]:adjclose_df_log_return.index[-1]].transpose()
high_df_stack = high_df_t.stack()

low_df_t = low_df.loc[adjclose_df_log_return.index[0]:adjclose_df_log_return.index[-1]].transpose()
low_df_stack = low_df_t.stack()

volume_df_t = volume_df.loc[adjclose_df_log_return.index[0]:adjclose_df_log_return.index[-1]].transpose()
volume_df_stack = volume_df_t.stack()

data = {'open': open_df_stack, 
        'high': high_df_stack,
        'low': low_df_stack,
        'adjclose': adjclose_df_stack,
        'volume': volume_df_stack,
        'label': classification_df_stack} # taken from before computation

df_concat2 = pd.concat(data, axis = 1)
df_concat2.dropna(axis = 0, how = 'any', inplace = True)
df_concat2

Unnamed: 0_level_0,Unnamed: 1_level_0,open,high,low,adjclose,volume,label
Unnamed: 0_level_1,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
A,2019-07-29,69.500000,70.500000,69.379997,69.507103,2750000.0,0.0
A,2019-07-30,69.250000,70.180000,68.099998,69.289803,2871800.0,0.0
A,2019-07-31,70.190002,70.660004,68.919998,68.558876,3846100.0,0.0
A,2019-08-01,69.629997,70.680000,68.389999,68.361320,2494100.0,0.0
A,2019-08-02,69.120003,69.129997,67.559998,67.245178,2803500.0,0.0
...,...,...,...,...,...,...,...
ZYXI,2021-03-11,16.820000,17.110001,16.370001,16.980000,518100.0,0.0
ZYXI,2021-03-12,16.879999,16.959999,16.549999,16.870001,421500.0,0.0
ZYXI,2021-03-15,16.809999,17.030001,16.129999,16.330000,463200.0,0.0
ZYXI,2021-03-16,16.400000,16.709999,15.600000,16.080000,483200.0,1.0


##### Remember: .std() is sample deviation, whereas the standardization thru StandardScaler use the standard deviation (the difference is the denominator, in the sample std it is used N-1). Moreover, to compute the standard deviation, you can do it in this way: .std(ddof=0)

# 3. Third part: logistic regression 

In [17]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report

### &nbsp;&nbsp;&nbsp; %FUNCTION% Function for logistic regression

In [18]:
def logistic_reg(features_df, classification_array, train_size = 0.80, shuffle_value = True, stratify_value = None, random_state=None):
    
    x_train, x_test, y_train, y_test = train_test_split(features_df, classification_array, train_size = train_size, shuffle = shuffle_value, stratify = stratify_value, random_state = random_state)

    lr = LogisticRegression()

    lr.fit(x_train, y_train)

    lr_pred = lr.predict(x_test)

    target_names = ['class 0', 'class 1']

    return(confusion_matrix(y_test, lr_pred), classification_report(y_test, lr_pred, target_names=target_names, output_dict=True))

### &nbsp;&nbsp;&nbsp; * Definition of the StockClass dictionary

In [19]:
# Definition of a dictionary to store stock as StockClass instances and for each stock get attribute
# (take a look at _2_0_stock_dataframe_class.py for more information)
stock_object_dictionary = {'{0}'.format(ticker): StockClass(ticker=ticker_list) for ticker in ticker_list}

### a. Trial 1: No Shuffle (i.e. No Stratify) - 80/20 - scaled features

In [20]:
%%time
for ticker in ticker_list:
    
    try:
        # features dataframe, without classification column
        features_df = df_concat.loc[ticker]
        features_df.drop('label', axis=1, inplace=True)
        
        # classification array
        classification_array = df_concat.loc[ticker]['label'].values

        # sacaling data
        df_sc_scaled = StandardScaler().fit_transform(features_df)

        shuffle_value = False # no shuffle, because of this we cannot stratify our label
        stratify_value = None
        train_size = 0.80
        random_state = None

        stock_object_dictionary['{0}'.format(ticker)].confusion_matrix, stock_object_dictionary['{0}'.format(ticker)].accuracy_report = logistic_reg(df_sc_scaled, classification_array, train_size, shuffle_value, stratify_value, random_state) 

    except:
        pass

CPU times: user 1min 7s, sys: 1.1 s, total: 1min 8s
Wall time: 1min 8s


### b. Trial 2: Shuffle&Stratify - 80/20 - scaled features

In [21]:
%%time
for ticker in ticker_list:
    
    try:
        # features dataframe, without classification column
        features_df = df_concat.loc[ticker]
        features_df.drop('label', axis=1, inplace=True)
        
        # classification array
        classification_array = df_concat.loc[ticker]['label'].values

        # sacaling data
        df_sc_scaled = StandardScaler().fit_transform(features_df)

        shuffle_value = True # shuffle, because of this we cannot stratify our label
        stratify_value = classification_array # stratify fashion 
        train_size = 0.80
        random_state = None

        stock_object_dictionary['{0}'.format(ticker)].confusion_matrix2, stock_object_dictionary['{0}'.format(ticker)].accuracy_report2 = logistic_reg(df_sc_scaled, classification_array, train_size, shuffle_value, stratify_value, random_state) 
    
    except:
        pass

CPU times: user 1min 7s, sys: 1.13 s, total: 1min 8s
Wall time: 1min 8s


In [27]:
%%time
for j, ticker in enumerate(ticker_list):
  
  try:
      column_list = []
      vector_values = [] 
      data = pd.DataFrame(data=stock_object_dictionary['{0}'.format(ticker)].accuracy_report)

      for c in data.columns:
        for i in data.index:
          if c != 'accuracy':
            if c != 'macro avg' and c != 'weighted avg':
              column_list.append(str(c + '-' + i))
              vector_values.append(data[c][i])
            else:
              if i != 'support':
                column_list.append(str(c + '-' + i))
                vector_values.append(data[c][i])
          elif i=='precision':
            column_list.append(str(c))
            vector_values.append(data[c][i])

      if j == 0:
        matrix=np.array(vector_values).reshape(1,len(np.array(vector_values)))
      else:
        vector_values = np.array(vector_values).reshape(1,len(np.array(vector_values)))
        matrix=np.concatenate((matrix, vector_values))
         
  except:
    pass      

CPU times: user 4.59 s, sys: 21.7 ms, total: 4.62 s
Wall time: 4.62 s


In [38]:
matrix

array([[0.6       , 0.14634146, 0.23529412, ..., 0.55979535, 0.53012048,
        0.45064555],
       [0.46987952, 1.        , 0.63934426, ..., 0.22078676, 0.46987952,
        0.30041477],
       [0.55555556, 0.97826087, 0.70866142, ..., 0.53078983, 0.55421687,
        0.41561278],
       ...,
       [0.60416667, 0.63043478, 0.61702128, ..., 0.56409925, 0.56626506,
        0.56485517],
       [0.56164384, 0.93181818, 0.7008547 , ..., 0.62665456, 0.57831325,
        0.50578872],
       [0.55737705, 0.79069767, 0.65384615, ..., 0.57353707, 0.56626506,
        0.54083829]])

In [46]:
stock_report_df=pd.DataFrame(data=np.array(matrix), columns=column_list, index=ticker_list)
stock_report_df

Unnamed: 0,class 0-precision,class 0-recall,class 0-f1-score,class 0-support,class 1-precision,class 1-recall,class 1-f1-score,class 1-support,accuracy,macro avg-precision,macro avg-recall,macro avg-f1-score,weighted avg-precision,weighted avg-recall,weighted avg-f1-score
A,0.600000,0.146341,0.235294,41.0,0.520548,0.904762,0.660870,42.0,0.530120,0.560274,0.525552,0.448082,0.559795,0.530120,0.450646
AA,0.469880,1.000000,0.639344,39.0,0.000000,0.000000,0.000000,44.0,0.469880,0.234940,0.500000,0.319672,0.220787,0.469880,0.300415
AACG,0.555556,0.978261,0.708661,46.0,0.500000,0.027027,0.051282,37.0,0.554217,0.527778,0.502644,0.379972,0.530790,0.554217,0.415613
AAIC,0.493976,1.000000,0.661290,41.0,0.000000,0.000000,0.000000,42.0,0.493976,0.246988,0.500000,0.330645,0.244012,0.493976,0.326661
AAL,0.445783,1.000000,0.616667,37.0,0.000000,0.000000,0.000000,46.0,0.445783,0.222892,0.500000,0.308333,0.198723,0.445783,0.274900
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZUO,0.333333,0.361111,0.346667,36.0,0.477273,0.446809,0.461538,47.0,0.409639,0.405303,0.403960,0.404103,0.414841,0.409639,0.411715
ZVO,0.469136,0.974359,0.633333,39.0,0.500000,0.022727,0.043478,44.0,0.469880,0.484568,0.498543,0.338406,0.485498,0.469880,0.320639
ZYME,0.604167,0.630435,0.617021,46.0,0.514286,0.486486,0.500000,37.0,0.566265,0.559226,0.558461,0.558511,0.564099,0.566265,0.564855
ZYNE,0.561644,0.931818,0.700855,44.0,0.700000,0.179487,0.285714,39.0,0.578313,0.630822,0.555653,0.493284,0.626655,0.578313,0.505789


In [None]:
for i in stock_report_df

In [50]:
stock_report_df.sort_values(by=['accuracy'], ascending=False)

Unnamed: 0,class 0-precision,class 0-recall,class 0-f1-score,class 0-support,class 1-precision,class 1-recall,class 1-f1-score,class 1-support,accuracy,macro avg-precision,macro avg-recall,macro avg-f1-score,weighted avg-precision,weighted avg-recall,weighted avg-f1-score
NAV-PD,0.963855,1.0,0.981595,80.0,0.0,0.0,0.0,3.0,0.963855,0.481928,0.5,0.490798,0.929017,0.963855,0.946116
WSO-B,0.926829,0.987013,0.955975,77.0,0.0,0.0,0.0,6.0,0.915663,0.463415,0.493506,0.477987,0.85983,0.915663,0.886868
TAP-A,0.853659,1.0,0.921053,70.0,1.0,0.076923,0.142857,13.0,0.855422,0.926829,0.538462,0.531955,0.876579,0.855422,0.799167
CMCTP,0.855422,1.0,0.922078,71.0,0.0,0.0,0.0,12.0,0.855422,0.427711,0.5,0.461039,0.731746,0.855422,0.788765
BIO-B,0.855422,1.0,0.922078,71.0,0.0,0.0,0.0,12.0,0.855422,0.427711,0.5,0.461039,0.731746,0.855422,0.788765
GJR,0.855422,1.0,0.922078,71.0,0.0,0.0,0.0,12.0,0.855422,0.427711,0.5,0.461039,0.731746,0.855422,0.788765
OBAS,0.858974,0.957143,0.905405,70.0,0.4,0.153846,0.222222,13.0,0.831325,0.629487,0.555495,0.563814,0.787087,0.831325,0.798401
MOG-B,0.831325,1.0,0.907895,69.0,0.0,0.0,0.0,14.0,0.831325,0.415663,0.5,0.453947,0.691102,0.831325,0.754756
RDIB,0.829268,1.0,0.906667,68.0,1.0,0.066667,0.125,15.0,0.831325,0.914634,0.533333,0.515833,0.860123,0.831325,0.765402
STZ-B,0.807229,1.0,0.893333,67.0,0.0,0.0,0.0,16.0,0.807229,0.403614,0.5,0.446667,0.651619,0.807229,0.721124


### c. Trial 3: No Shuffle (i.e. No Stratify) - 70/30 - scaled features

In [None]:
%%time
for ticker in ticker_list:
    
    try:
        # features dataframe, without classification column
        features_df = df_concat.loc[ticker]
        features_df.drop('label', axis=1, inplace=True)
        
        # classification array
        classification_array = df_concat.loc[ticker]['label'].values

        # sacaling data
        df_sc_scaled = StandardScaler().fit_transform(features_df)

        shuffle_value = False # no shuffle, because of this we cannot stratify our label
        stratify_value = None
        train_size = 0.70
        random_state = None

        stock_object_dictionary['{0}'.format(ticker)].confusion_matrix, stock_object_dictionary['{0}'.format(ticker)].accuracy_report = logistic_reg(df_sc_scaled, classification_array, train_size, shuffle_value, stratify_value, random_state) 

    except:
        pass

### d. Trial 4: Shuffle&Stratify - 70/30 - scaled features

In [None]:
%%time
for ticker in ticker_list:
    
    try:
        # features dataframe, without classification column
        features_df = df_concat.loc[ticker]
        features_df.drop('label', axis=1, inplace=True)
        
        # classification array
        classification_array = df_concat.loc[ticker]['label'].values

        # sacaling data
        df_sc_scaled = StandardScaler().fit_transform(features_df)

        shuffle_value = True # shuffle, because of this we cannot stratify our label
        stratify_value = classification_array # stratify fashion 
        train_size = 0.70
        random_state = None

        stock_object_dictionary['{0}'.format(ticker)].confusion_matrix2, stock_object_dictionary['{0}'.format(ticker)].accuracy_report2 = logistic_reg(df_sc_scaled, classification_array, train_size, shuffle_value, stratify_value, random_state) 
    
    except:
        pass

In [37]:
stock = 'ZYXI'

print('')
print('======================================================')
print('================   Confusion Matrix   ================')
print('======================================================')
print('')

# confusion matrix
print(stock_object_dictionary['{0}'.format(stock)].confusion_matrix)

print('')
print('======================================================')
print('=====================   Report   =====================')
print('======================================================')
print('')

# classification report
print(stock_object_dictionary['{0}'.format(stock)].accuracy_report)



print('')
print('======================================================')
print('================   Confusion Matrix   ================')
print('======================================================')
print('')

# confusion matrix
print(stock_object_dictionary['{0}'.format(stock)].confusion_matrix2)

print('')
print('======================================================')
print('=====================   Report   =====================')
print('======================================================')
print('')

# classification report
print(stock_object_dictionary['{0}'.format(stock)].accuracy_report2)



[[34  9]
 [27 13]]


{'class 0': {'precision': 0.5573770491803278, 'recall': 0.7906976744186046, 'f1-score': 0.6538461538461539, 'support': 43}, 'class 1': {'precision': 0.5909090909090909, 'recall': 0.325, 'f1-score': 0.41935483870967744, 'support': 40}, 'accuracy': 0.5662650602409639, 'macro avg': {'precision': 0.5741430700447094, 'recall': 0.5578488372093023, 'f1-score': 0.5366004962779156, 'support': 83}, 'weighted avg': {'precision': 0.573537069290575, 'recall': 0.5662650602409639, 'f1-score': 0.5408382911297798, 'support': 83}}


[[23 20]
 [23 17]]


{'class 0': {'precision': 0.5, 'recall': 0.5348837209302325, 'f1-score': 0.5168539325842696, 'support': 43}, 'class 1': {'precision': 0.4594594594594595, 'recall': 0.425, 'f1-score': 0.44155844155844154, 'support': 40}, 'accuracy': 0.4819277108433735, 'macro avg': {'precision': 0.4797297297297297, 'recall': 0.4799418604651162, 'f1-score': 0.47920618707135554, 'support': 83}, 'weighted avg': {'precision': 0.4804623901009443, 'recall