# Collaborative Clustering

## Importing Libraries

In [59]:
import math
import numpy as np
import pandas as pd
import warnings
# import pandas_datareader as web
#from sklearn.tree import DecisionTreeRegressor
#from sklearn.linear_mode
#import LinearRegression
#from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras import Model
from keras.layers import Dense, LSTM, Dropout, Concatenate
from keras.optimizers import Adam
import matplotlib.pyplot as plt

#plt.style.use('fivethirtyeight')
#plt.style.use('fivethirtyeight')
plt.style.use('seaborn-v0_8-dark')

In [60]:
warnings.filterwarnings('ignore')

### Importing Stocks dataframe

In [61]:
demo_df = pd.read_csv('./Combining Stock and Twitter Data/MyDrive/AmazonStock_Sentiment_6M.csv');
demo_df

Unnamed: 0,date,open,low,high,volume,close,Name,Date,Positve,Negative,Neutral,Total,NeutralPos,NeutralNeg
0,10/2/2017,964.00,952.1201,967.305,2415846,959.19,AMZN,10/2/2017,37.088734,7.577268,55.333998,1003,37.088734,14.260219
1,10/3/2017,958.00,950.3700,963.690,2643484,957.10,AMZN,10/3/2017,38.159879,11.085973,50.754148,1326,38.159879,22.348774
2,10/4/2017,954.21,954.0500,967.790,2460721,965.45,AMZN,10/4/2017,36.168582,10.727969,53.103448,1305,36.168582,19.641470
3,10/5/2017,970.00,969.6400,981.510,3119487,980.85,AMZN,10/5/2017,47.939017,7.227555,44.833427,1771,47.939017,24.411076
4,10/6/2017,975.64,975.6400,995.750,3719840,989.58,AMZN,10/6/2017,34.628045,11.586570,53.785385,1519,34.628045,19.818134
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
104,3/5/2018,1494.24,1481.0000,1525.380,5233934,1523.61,AMZN,3/5/2018,38.274182,7.794015,53.931802,1437,38.274182,15.879162
105,3/6/2018,1533.20,1528.0000,1542.130,4561718,1537.64,AMZN,3/6/2018,45.234604,6.818182,47.947214,1364,45.234604,20.887916
106,3/7/2018,1526.52,1522.5100,1545.900,4174123,1545.00,AMZN,3/7/2018,38.167260,9.786477,52.046263,1124,38.167260,19.757162
107,3/8/2018,1550.00,1545.2500,1554.880,3512528,1551.86,AMZN,3/8/2018,37.221728,8.726625,54.051647,1123,37.221728,16.691926


### Importing general user's decisions in the past

In [62]:
df_decisions_list = []
df_decisions_list.append({'df': pd.read_csv('./amazon_stock_trades.csv'), 'comp': 'AMZN'})
df_decisions_list.append({'df': pd.read_csv('./apple_stock_trades.csv'), 'comp': 'AAPL'})
df_decisions_list.append({'df': pd.read_csv('./cisco_stock_trades.csv'), 'comp': 'CSCO'})
df_decisions_list.append({'df': pd.read_csv('./ibm_stock_trades.csv'), 'comp': 'IBM'})
df_decisions_list.append({'df': pd.read_csv('./jnj_stock_trades.csv'), 'comp': 'JNJ'})
df_decisions_list.append({'df': pd.read_csv('./jnpr_stock_trades.csv'), 'comp': 'JNPR'})
df_decisions_list.append({'df': pd.read_csv('./msft_stock_trades.csv'), 'comp': 'MSFT'})
df_decisions_list.append({'df': pd.read_csv('./orcl_stock_trades.csv'), 'comp': 'ORCL'})
df_decisions_list.append({'df': pd.read_csv('./pfizer_stock_trades.csv'), 'comp': 'PFIZER'})
df_decisions_list.append({'df': pd.read_csv('./tgt_stock_trades.csv'), 'comp': 'TGT'})

## Normalising the decisions based on companies

In [63]:
# scale each df 
from sklearn.preprocessing import MinMaxScaler
columns_to_exclude = ['Sector of stock', 'Buy/Sell/Keep']
columns_to_normalize = ['Current Value of Stock', 'Expected Value of Stock', 'Error in expected value of stock', 'Percentage of portfolio', 'Percentage of sector', 'Positive', 'Negative', 'Neutral', 'Total', 'Neutral_Pos', 'Neutral_Neg']
scalers_decisions = []
for i in range(len(df_decisions_list)):
    scaler = MinMaxScaler()
    normalized_values = scaler.fit_transform(df_decisions_list[i]['df'][columns_to_normalize])
    normalized_df = pd.DataFrame(normalized_values, columns=columns_to_normalize)
    scalers_decisions.append({'scaler': scaler, 'comp': df_decisions_list[i]['comp']})

    # Concatenate the normalized DataFrame with the excluded column
    df_decisions_list[i]['df'] = pd.concat([normalized_df, df_decisions_list[i]['df'][columns_to_exclude]], axis=1)

In [64]:
scalers_decisions

[{'scaler': MinMaxScaler(), 'comp': 'AMZN'},
 {'scaler': MinMaxScaler(), 'comp': 'AAPL'},
 {'scaler': MinMaxScaler(), 'comp': 'CSCO'},
 {'scaler': MinMaxScaler(), 'comp': 'IBM'},
 {'scaler': MinMaxScaler(), 'comp': 'JNJ'},
 {'scaler': MinMaxScaler(), 'comp': 'JNPR'},
 {'scaler': MinMaxScaler(), 'comp': 'MSFT'},
 {'scaler': MinMaxScaler(), 'comp': 'ORCL'},
 {'scaler': MinMaxScaler(), 'comp': 'PFIZER'},
 {'scaler': MinMaxScaler(), 'comp': 'TGT'}]

### Importing 6 months stock data of each company

In [65]:
# scalers for stock data

# create main_df for each company stock data
main_df = []
main_df.append({'df': pd.read_csv('./Combining Stock and Twitter Data/MyDrive/AmazonStock_Sentiment_6M.csv').drop(['date','Date','Name'],axis=1), 'comp': 'AMZN'})
main_df.append({'df': pd.read_csv('./Combining Stock and Twitter Data/MyDrive/AAPLStock_sentiment_6M.csv').drop(['date','Date','Name'],axis=1), 'comp': 'AAPL'})
main_df.append({'df': pd.read_csv('./Combining Stock and Twitter Data/MyDrive/CSCOStock_Sentiment_6M.csv').drop(['date','Date','Name'],axis=1), 'comp': 'CSCO'})
main_df.append({'df': pd.read_csv('./Combining Stock and Twitter Data/MyDrive/IBMStock_Sentiment_6M.csv').drop(['date','Date','Name'],axis=1), 'comp': 'IBM'})
main_df.append({'df': pd.read_csv('./Combining Stock and Twitter Data/MyDrive/JNJStock_Sentiment_6M.csv').drop(['date','Date','Name'],axis=1), 'comp': 'JNJ'})
main_df.append({'df': pd.read_csv('./Combining Stock and Twitter Data/MyDrive/JNPRStock_Sentiment_6M.csv').drop(['date','Date','Name'],axis=1), 'comp': 'JNPR'})
main_df.append({'df': pd.read_csv('./Combining Stock and Twitter Data/MyDrive/MSFTStock_Sentiment_6M.csv').drop(['date','Date','Name'],axis=1), 'comp': 'MSFT'})
main_df.append({'df': pd.read_csv('./Combining Stock and Twitter Data/MyDrive/ORCLStock_Sentiment_6M.csv').drop(['date','Date','Name'],axis=1), 'comp': 'ORCL'})
main_df.append({'df': pd.read_csv('./Combining Stock and Twitter Data/MyDrive/PfizerStock_Sentiment_6M.csv').drop(['date','Date','Name'],axis=1), 'comp': 'PFIZER'})
main_df.append({'df': pd.read_csv('./Combining Stock and Twitter Data/MyDrive/TGTStock_Sentiment_6M.csv').drop(['date','Date','Name'],axis=1), 'comp': 'TGT'})


In [66]:
# creating the diff and exp column
for i in main_df:
    i['df']['diff']=i['df']['high']-i['df']['low']
for i in main_df:
    i['df']['exp']=(i['df']['open']+i['df']['close'])/2

### Normalising the stocks data by company

In [67]:
scalers_stock = []
for i in range(len(main_df)):
    scaler = MinMaxScaler(feature_range=(0, 1))
    main_df[i]['df'][main_df[i]['df'].columns] = scaler.fit_transform(main_df[i]['df'][main_df[i]['df'].columns])
    scalers_stock.append({'scaler': scaler, 'comp': main_df[i]['comp']})

In [68]:
dataset = []
for i in df_decisions_list:
    dataset.append(i['df'].values)
dataset = np.concatenate(dataset, axis = 0)
print(dataset)

[[0.01690875490077537 0.0 0.0573411800211275 ... 0.2889154068034872 'IT'
  'Keep']
 [0.006545881621444094 0.003842394418416717 0.06051003470650517 ...
  0.4996072102743184 'IT' 'Keep']
 [0.0 0.030124035188243692 0.04640108646446504 ... 0.42908697933553197
  'IT' 'Buy']
 ...
 [0.8417116742833404 0.7736249734550866 0.9428571428571434 ...
  0.37337504235759883 'Consumer Discretionary' 'Keep']
 [0.8201080182800169 0.6982374177107671 0.2527472527472538 ...
  0.5026178016456893 'Consumer Discretionary' 'Sell']
 [0.6875778977980893 0.6704183478445533 0.44395604395604266 ...
  0.3476663784289607 'Consumer Discretionary' 'Keep']]


### Concatenating the stocks data to a single CSV

In [69]:
import csv
with open('stock_decisions.csv', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['Current Value of Stock', 'Expected Value of Stock', 'Error in expected value of stock', 'Percentage of portfolio', 'Percentage of sector', 'Positive', 'Negative', 'Neutral', 'Total', 'Neutral_Pos', 'Neutral_Neg', 'Sector of stock','Buy/Sell/Keep'])
    writer.writerows(dataset)

In [70]:
df_decisions = pd.read_csv('./stock_decisions.csv');
df_decisions.head()

Unnamed: 0,Current Value of Stock,Expected Value of Stock,Error in expected value of stock,Percentage of portfolio,Percentage of sector,Positive,Negative,Neutral,Total,Neutral_Pos,Neutral_Neg,Sector of stock,Buy/Sell/Keep
0,0.016909,0.0,0.057341,0.747695,0.353394,0.634112,0.393651,0.209993,0.363674,0.354623,0.288915,IT,Keep
1,0.006546,0.003842,0.06051,0.875622,0.132926,0.65947,0.618285,0.282692,0.270249,0.399351,0.499607,IT,Keep
2,0.0,0.030124,0.046401,0.038742,0.334798,0.612328,0.595365,0.277965,0.318172,0.316199,0.429087,IT,Buy
3,0.027272,0.042233,0.108571,0.334854,0.926111,0.890981,0.371262,0.382849,0.149471,0.807705,0.553326,IT,Sell
4,0.037013,0.058251,0.039837,0.16537,0.620705,0.575857,0.650335,0.326131,0.332083,0.25187,0.433689,IT,Buy


In [71]:
df_decisions.drop('Sector of stock', axis=1, inplace=True)

## Taking Input from the user

In [72]:
input = []

todays_date = '1/5/2018'
list_of_stocks = []
list_of_stocks.append({'Percentage of portfolio': 0.5, 'Percentage of sector': 15.56, 'Sector of Stock': 'IT', 'comp': 'AMZN'});
list_of_stocks.append({'Percentage of portfolio': 4.5, 'Percentage of sector': 7.8, 'Sector of Stock': 'IT', 'comp': 'MSFT'});
list_of_stocks.append({'Percentage of portfolio': 7.5, 'Percentage of sector': 18.26, 'Sector of Stock': 'Health', 'comp': 'AAPL'});
list_of_stocks.append({'Percentage of portfolio': 16.5, 'Percentage of sector': 10.26, 'Sector of Stock': 'Health', 'comp': 'PFIZER'});
list_of_stocks

[{'Percentage of portfolio': 0.5,
  'Percentage of sector': 15.56,
  'Sector of Stock': 'IT',
  'comp': 'AMZN'},
 {'Percentage of portfolio': 4.5,
  'Percentage of sector': 7.8,
  'Sector of Stock': 'IT',
  'comp': 'MSFT'},
 {'Percentage of portfolio': 7.5,
  'Percentage of sector': 18.26,
  'Sector of Stock': 'Health',
  'comp': 'AAPL'},
 {'Percentage of portfolio': 16.5,
  'Percentage of sector': 10.26,
  'Sector of Stock': 'Health',
  'comp': 'PFIZER'}]

In [73]:
import tensorflow as tf
from tensorflow.keras.models import load_model

## Initialsing the LSTM Model

In [74]:
from tensorflow.keras.losses import MeanSquaredLogarithmicError, MeanSquaredError
from keras.layers import Dense, LSTM, Dropout, Concatenate, BatchNormalization
from tensorflow.keras.saving import register_keras_serializable
@register_keras_serializable(package="MyLoss")
def MyLoss(y_true, y_pred):
    msle = MeanSquaredLogarithmicError()(y_true[:,0], y_pred[:,0])
    mse = MeanSquaredError()(y_true[:,1],y_pred[:,1])
    return  msle + mse

@register_keras_serializable(package='MyModel')
class MyModel(Model):
    def __init__(self, **kwargs):
        super(MyModel, self).__init__()
        self.lstm1 = LSTM(units=50, return_sequences=True,
                          input_shape=[None,30,13])
        self.bn1 = BatchNormalization()
        self.dropout1 = Dropout(0.2)

        self.lstm2 = LSTM(units=60, return_sequences=True)
        self.bn2 = BatchNormalization()
        self.dropout2 = Dropout(0.3)

        self.lstm3 = LSTM(units=70, return_sequences=True)
        self.bn3 = BatchNormalization()
        self.dropout3 = Dropout(0.4)

        self.lstm4 = LSTM(units=80)
        self.bn4 = BatchNormalization()
        self.dropout4 = Dropout(0.5)

        self.dense1_1 = Dense(20)
        self.dense2_1 = Dense(1)

        self.dense1_2 = Dense(20)
        self.dense2_2 = Dense(1)
        self.concat = Concatenate(axis=-1)

    def call(self, inputs):
        enc = self.lstm1(inputs)
        enc = self.bn1(enc)
        enc = self.dropout1(enc)

        enc = self.lstm2(enc)
        enc = self.bn2(enc)
        enc = self.dropout2(enc)

        enc = self.lstm3(enc)
        enc = self.bn3(enc)
        enc = self.dropout3(enc)

        enc = self.lstm4(enc)
        enc = self.bn4(enc)
        enc = self.dropout4(enc)

        out1 = self.dense1_1(enc)
        out1 = self.dense2_1(out1)

        out2 = self.dense1_2(enc)
        out2 = self.dense2_2(out2)

        out = self.concat([out1, out2])
        return out

## Function to build the next day vector

In [75]:
import tensorflow as tf
from tensorflow.keras.models import load_model
from sklearn.preprocessing import MinMaxScaler
import numpy as np
def makeNextDayVector(filtered_df, todays_date, stock, comp_name):
    
    
    filtered_df['diff']=filtered_df['high']-filtered_df['low']
    filtered_df['exp']=(filtered_df['open']+filtered_df['close'])/2
    
    
    loaded_model = load_model('model.keras')
    
    curr_row = filtered_df[filtered_df['Date'] == todays_date]
    
    curr_row.drop(['date', 'Date', 'Name'], axis=1, inplace=True)
    filtered_df.drop(['date', 'Date', 'Name'], axis=1, inplace=True)
    
    curr_stock_value = curr_row['open'].iloc[0]
    
    # find scaler
    scaler = MinMaxScaler()
    for dict in scalers_stock:
        if dict['comp'] == comp_name:
            scaler = dict['scaler']
            break
    
    # normalise the df
    filtered_df[filtered_df.columns] = scaler.transform(filtered_df[filtered_df.columns])

    predicted_values = pd.DataFrame()
    
    predicted_values[['open', 'high', 'low', 'volume', 'close', 'Positve', 'Negative','Neutral', 'Total', 'NeutralPos', 'NeutralNeg']] = 0
    
    # predict the stock price
    predicted_values[['diff', 'exp']] = loaded_model.predict(np.array([filtered_df.values]).astype(float))
    
    predicted_values[predicted_values.columns] = scaler.inverse_transform(predicted_values)
    
    
    
    predicted_values.drop(['open', 'high', 'low', 'volume', 'close', 'Positve', 'Negative','Neutral', 'Total', 'NeutralPos', 'NeutralNeg'], axis=1, inplace=True)
    
    
    expected_value = predicted_values['exp'].iloc[0]
    error = abs(predicted_values['diff']).iloc[0]
    
    percen_portfolio = stock['Percentage of portfolio']
    percen_sector = stock['Percentage of sector']
    sector = stock['Sector of Stock']
    
    positives = curr_row['Positve'].iloc[0]
    negatives = curr_row['Negative'].iloc[0]
    total = curr_row['Total'].iloc[0]
    neutral = curr_row['Neutral'].iloc[0]
    neutral_pos = curr_row['NeutralPos'].iloc[0]
    neutral_neg = curr_row['NeutralNeg'].iloc[0]
    
    
    # create the next day vector
    stock_vect = []
    stock_vect.append(curr_stock_value)
    stock_vect.append(expected_value)
    stock_vect.append(error)
    stock_vect.append(percen_portfolio)
    stock_vect.append(percen_sector)
    stock_vect.append(positives)
    stock_vect.append(negatives)
    stock_vect.append(total)
    stock_vect.append(neutral)
    stock_vect.append(neutral_pos)
    stock_vect.append(neutral_neg)
    
    return stock_vect
    

In [76]:
import logging
logging.basicConfig(level=logging.INFO)

### Making the next day vectors for the inputs

In [77]:
next_vectors = []
for i in range(len(list_of_stocks)):
    if(list_of_stocks[i]['comp'] == 'AMZN'):
        df = pd.read_csv('./Combining Stock and Twitter Data/MyDrive/AmazonStock_Sentiment_6M.csv');
        filtered_df = df[df['Date'].replace('/', '') <= todays_date.replace('/', '')].tail(30)
        next_vectors.append({'next_day_vector': makeNextDayVector(filtered_df, todays_date, list_of_stocks[i], 'AMZN'), 'comp': 'AMZN'})
    elif(list_of_stocks[i]['comp'] == 'AAPL'):
        df = pd.read_csv('./Combining Stock and Twitter Data/MyDrive/AAPLStock_sentiment_6M.csv')
        filtered_df = df[df['Date'].replace('/', '') <= todays_date.replace('/', '')].tail(30)
        next_vectors.append({'next_day_vector': makeNextDayVector(filtered_df, todays_date, list_of_stocks[i], 'AAPL'), 'comp': 'AAPL'})
    elif(list_of_stocks[i]['comp'] == 'MSFT'):
        df = pd.read_csv('./Combining Stock and Twitter Data/MyDrive/MSFTStock_Sentiment_6M.csv')
        filtered_df = df[df['Date'].replace('/', '') <= todays_date.replace('/', '')].tail(30)
        next_vectors.append({'next_day_vector': makeNextDayVector(filtered_df, todays_date, list_of_stocks[i], 'MSFT'), 'comp': 'MSFT'})
    elif(list_of_stocks[i]['comp'] == 'PFIZER'):
        df = pd.read_csv('./Combining Stock and Twitter Data/MyDrive/PfizerStock_Sentiment_6M.csv')
        filtered_df = df[df['Date'].replace('/', '') <= todays_date.replace('/', '')].tail(30)
        next_vectors.append({'next_day_vector': makeNextDayVector(filtered_df, todays_date, list_of_stocks[i], 'PFIZER'), 'comp': 'PFIZER'})
    elif(list_of_stocks[i]['comp'] == 'CSCO'):
        df = pd.read_csv('./Combining Stock and Twitter Data/MyDrive/CSCOStock_Sentiment_6M.csv')
        filtered_df = df[df['Date'].replace('/', '') <= todays_date.replace('/', '')].tail(30)
        next_vectors.append({'next_day_vector': makeNextDayVector(filtered_df, todays_date, list_of_stocks[i], 'CSCO'), 'comp': 'CSCO'})
    elif(list_of_stocks[i]['comp'] == 'IBM'):
        df = pd.read_csv('./Combining Stock and Twitter Data/MyDrive/IBMStock_Sentiment_6M.csv')
        filtered_df = df[df['Date'].replace('/', '') <= todays_date.replace('/', '')].tail(30)
        next_vectors.append({'next_day_vector': makeNextDayVector(filtered_df, todays_date, list_of_stocks[i], 'IBM'), 'comp': 'IBM'})
    elif(list_of_stocks[i]['comp'] == 'JNJ'):
        df = pd.read_csv('./Combining Stock and Twitter Data/MyDrive/JNJStock_Sentiment_6M.csv')
        filtered_df = df[df['Date'].replace('/', '') <= todays_date.replace('/', '')].tail(30)
        next_vectors.append({'next_day_vector': makeNextDayVector(filtered_df, todays_date, list_of_stocks[i], 'JNJ'), 'comp': 'JNJ'})
    elif(list_of_stocks[i]['comp'] == 'JNPR'):
        df = pd.read_csv('./Combining Stock and Twitter Data/MyDrive/JNPRStock_Sentiment_6M.csv')
        filtered_df = df[df['Date'].replace('/', '') <= todays_date.replace('/', '')].tail(30)
        next_vectors.append({'next_day_vector': makeNextDayVector(filtered_df, todays_date, list_of_stocks[i], 'JNPR'), 'comp': 'JNPR'})    
    elif(list_of_stocks[i]['comp'] == 'ORCL'):
        df = pd.read_csv('./Combining Stock and Twitter Data/MyDrive/ORCLStock_Sentiment_6M.csv')
        filtered_df = df[df['Date'].replace('/', '') <= todays_date.replace('/', '')].tail(30)
        next_vectors.append({'next_day_vector': makeNextDayVector(filtered_df, todays_date, list_of_stocks[i], 'ORCL'), 'comp': 'ORCL'})
    elif(list_of_stocks[i]['comp'] == 'TGT'):
        df = pd.read_csv('./Combining Stock and Twitter Data/MyDrive/TGTStock_Sentiment_6M.csv')
        filtered_df = df[df['Date'].replace('/', '') <= todays_date.replace('/', '')].tail(30)
        next_vectors.append({'next_day_vector': makeNextDayVector(filtered_df, todays_date, list_of_stocks[i], 'TGT'), 'comp': 'TGT'})


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 609ms/step


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 628ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 562ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 545ms/step


In [78]:
next_vectors[0]['next_day_vector']

[1217.51,
 1440.2086984586717,
 54.31271990358817,
 0.5,
 15.56,
 52.54403131,
 6.653620352,
 1022,
 40.80234834,
 52.54403131,
 27.86822061]

## Pre Processing stock decision df

In [79]:
df_decisions.head()

Unnamed: 0,Current Value of Stock,Expected Value of Stock,Error in expected value of stock,Percentage of portfolio,Percentage of sector,Positive,Negative,Neutral,Total,Neutral_Pos,Neutral_Neg,Buy/Sell/Keep
0,0.016909,0.0,0.057341,0.747695,0.353394,0.634112,0.393651,0.209993,0.363674,0.354623,0.288915,Keep
1,0.006546,0.003842,0.06051,0.875622,0.132926,0.65947,0.618285,0.282692,0.270249,0.399351,0.499607,Keep
2,0.0,0.030124,0.046401,0.038742,0.334798,0.612328,0.595365,0.277965,0.318172,0.316199,0.429087,Buy
3,0.027272,0.042233,0.108571,0.334854,0.926111,0.890981,0.371262,0.382849,0.149471,0.807705,0.553326,Sell
4,0.037013,0.058251,0.039837,0.16537,0.620705,0.575857,0.650335,0.326131,0.332083,0.25187,0.433689,Buy


### Breaking down into buy, sell and keep dataframes

In [80]:
buy_df = df_decisions[df_decisions['Buy/Sell/Keep'] == 'Buy']

sell_df = df_decisions[df_decisions['Buy/Sell/Keep'] == 'Sell']

keep_df = df_decisions[df_decisions['Buy/Sell/Keep'] == 'Keep']

buy_df.drop(columns=['Buy/Sell/Keep'], inplace=True)
sell_df.drop(columns=['Buy/Sell/Keep'], inplace=True)
keep_df.drop(columns=['Buy/Sell/Keep'], inplace=True)

In [81]:
keep_df

Unnamed: 0,Current Value of Stock,Expected Value of Stock,Error in expected value of stock,Percentage of portfolio,Percentage of sector,Positive,Negative,Neutral,Total,Neutral_Pos,Neutral_Neg
0,0.016909,0.000000,0.057341,0.747695,0.353394,0.634112,0.393651,0.209993,0.363674,0.354623,0.288915
1,0.006546,0.003842,0.060510,0.875622,0.132926,0.659470,0.618285,0.282692,0.270249,0.399351,0.499607
5,0.067410,0.057948,0.091527,0.163465,0.959393,0.496957,0.459654,0.264011,0.460825,0.112700,0.191714
6,0.073335,0.059970,0.023276,0.379841,0.060957,0.552564,0.356246,0.273914,0.445859,0.210784,0.168752
7,0.064008,0.069635,0.077863,0.330739,0.747575,0.608680,0.618348,0.294846,0.313993,0.309765,0.443775
...,...,...,...,...,...,...,...,...,...,...,...
1059,0.777316,0.826290,0.457143,0.564369,0.700073,0.576933,0.578050,0.158835,0.380911,0.420627,0.404228
1062,0.903199,0.928010,0.604396,0.570575,0.228781,0.670365,0.294132,0.184211,0.440210,0.548578,0.204860
1064,0.895305,0.850074,0.470330,0.173859,0.665147,0.383629,0.419840,0.137218,0.614365,0.347657,0.205120
1067,0.841712,0.773625,0.942857,0.456054,0.807529,0.416400,0.683732,0.274436,0.461552,0.200784,0.373375


In [82]:
next_vectors

[{'next_day_vector': [1217.51,
   1440.2086984586717,
   54.31271990358817,
   0.5,
   15.56,
   52.54403131,
   6.653620352,
   1022,
   40.80234834,
   52.54403131,
   27.86822061],
  'comp': 'AMZN'},
 {'next_day_vector': [87.66,
   92.82440862655642,
   1.8587952208518992,
   4.5,
   7.8,
   39.01581722,
   11.42355009,
   569,
   49.56063269,
   41.36386314,
   11.42355009],
  'comp': 'MSFT'},
 {'next_day_vector': [173.44,
   167.11862921042442,
   2.707145949840536,
   7.5,
   18.26,
   48.41208366,
   9.295120062,
   1291,
   42.29279628,
   48.41208366,
   17.84049339],
  'comp': 'AAPL'},
 {'next_day_vector': [36.82,
   36.48794911816716,
   0.7796059048175816,
   16.5,
   10.26,
   36.4640884,
   3.867403315,
   181,
   59.66850829,
   36.4640884,
   9.16082135],
  'comp': 'PFIZER'}]

## Normalising the next vectors

In [83]:
for i in next_vectors:
    scaler = MinMaxScaler()
    for dict in scalers_decisions:
        if dict['comp'] == i['comp']:
            scaler = dict['scaler']
            break
    
    normalized_array = scaler.transform([i['next_day_vector']])
    normalized_list = normalized_array.flatten()
    i['next_day_vector'] = normalized_list

In [84]:
next_vectors

[{'next_day_vector': array([ 0.45475742,  0.81340574,  0.36662683, -0.01902568,  0.70143324,
          1.        ,  0.33451766,  0.21426964,  0.0672408 ,  1.        ,
          0.64337853]),
  'comp': 'AMZN'},
 {'next_day_vector': array([0.62833487, 0.89597133, 0.25807905, 0.93068418, 0.16783359,
         0.32479996, 0.30247938, 0.11842981, 0.61712824, 0.43254837,
         0.21429368]),
  'comp': 'MSFT'},
 {'next_day_vector': array([0.76961927, 0.52680659, 0.20900822, 1.56984983, 0.88343687,
         0.78368612, 0.23917044, 0.30149413, 0.26098711, 0.65903313,
         0.44507208]),
  'comp': 'AAPL'},
 {'next_day_vector': array([0.54958678, 0.47837624, 0.23793885, 3.65981051, 0.34478131,
         0.83804257, 0.21897318, 0.16624685, 0.30819748, 0.75803476,
         0.33570377]),
  'comp': 'PFIZER'}]

## Applying cosine similarity between Next day stock vector and user decision vectors

In [85]:
# similarty 
from sklearn.metrics.pairwise import cosine_similarity

buy_score = []
sell_score = []
keep_score = []

for i in range(len(next_vectors)):
    buy_score_similarity = 0
    buy_score_similarity = cosine_similarity(buy_df.values, [next_vectors[i]['next_day_vector']])
    total_cos_buy_score = np.average(buy_score_similarity)
    buy_score.append({'buyscore': total_cos_buy_score, 'comp': next_vectors[i]['comp']})
    
    
    sell_score_similarity = 0
    sell_score_similarity = cosine_similarity(sell_df.values, [next_vectors[i]['next_day_vector']])
    
    total_cos_sell_score = np.average(sell_score_similarity)
    sell_score.append({'sellscore': total_cos_sell_score, 'comp': next_vectors[i]['comp']})
    
    keep_score_similarity = 0
    keep_score_similarity = cosine_similarity(keep_df.values, [next_vectors[i]['next_day_vector']])
    total_cos_keep_score = np.average(keep_score_similarity)
    keep_score.append({'keepscore': total_cos_keep_score, 'comp': next_vectors[i]['comp']})



## Storing buy, sell and keep scores of each company

In [86]:
company_scores = {}

# Merge the scores for each company
for score_list, score_type in zip([buy_score, sell_score, keep_score], ['buy', 'sell', 'keep']):
    for score in score_list:
        comp = score['comp']
        score_value = score[score_type + 'score']
        if comp not in company_scores:
            company_scores[comp] = {'buy': None, 'sell': None, 'keep': None}
        company_scores[comp][score_type] = score_value

print(company_scores)

{'AMZN': {'buy': 0.7281041427036561, 'sell': 0.7144008918293004, 'keep': 0.702791886932548}, 'MSFT': {'buy': 0.8083289970965074, 'sell': 0.8071396442731366, 'keep': 0.8072339977922515}, 'AAPL': {'buy': 0.7951848092609144, 'sell': 0.7967859540532392, 'keep': 0.7969255168678114}, 'PFIZER': {'buy': 0.5899800920254605, 'sell': 0.5948927891429138, 'keep': 0.6011937564883656}}


## Recommending which stocks to buy, sell and keep

In [87]:
buy_basket = []
sell_basket = []
keep_basket = []

for company, scores in company_scores.items():
    buy_score = scores['buy']
    sell_score = scores['sell']
    keep_score = scores['keep']
    
    if buy_score is not None and (sell_score is None or buy_score > sell_score) and (keep_score is None or buy_score > keep_score):
        buy_basket.append(company)
    elif sell_score is not None and (buy_score is None or sell_score > buy_score) and (keep_score is None or sell_score > keep_score):
        sell_basket.append(company)
    elif keep_score is not None and (buy_score is None or keep_score > buy_score) and (sell_score is None or keep_score > sell_score):
        keep_basket.append(company)

print("Buy Stocks:", buy_basket)
print("Sell Stocks:", sell_basket)
print("Keep Stocks:", keep_basket)



Buy Stocks: ['AMZN', 'MSFT']
Sell Stocks: []
Keep Stocks: ['AAPL', 'PFIZER']
