In [1]:
# Loading Dependencies
from path import Path
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

#load VADER
analyzer = SentimentIntensityAnalyzer()

In [2]:
# Loading Data
data = Path('Resources/reddit.csv')
reddit_df = pd.read_csv(data)
reddit_df.head()

Unnamed: 0,title,score,subreddit,url,num_comments,body,date
0,UPVOTE so everyone sees we got SUPPORT,265029,wallstreetbets,https://i.redd.it/sgoqy8nyt2e61.png,11825,,2021-01-29 00:40:34
1,GME YOLO update — Jan 28 2021,230844,wallstreetbets,https://i.redd.it/opzucppb15e61.png,23532,,2021-01-29 08:06:23
2,CLASS ACTION AGAINST ROBINHOOD. Allowing peopl...,204920,wallstreetbets,https://www.reddit.com/r/wallstreetbets/commen...,18318,LEAVE ROBINHOOD. They dont deserve to make mon...,2021-01-29 00:49:11
3,GME YOLO update — Jan 27 2021 ----------------...,185949,wallstreetbets,https://i.redd.it/a309gkm5yxd61.png,15495,,2021-01-28 08:15:35
4,Can we all take a moment and appreciate the Mo...,184517,wallstreetbets,https://www.reddit.com/r/wallstreetbets/commen...,7105,,2021-01-28 11:57:32


In [3]:
# Checking DTypes
reddit_df.dtypes

title           object
score            int64
subreddit       object
url             object
num_comments     int64
body            object
date            object
dtype: object

In [4]:
# Look at subreddit value counts
subreddit = reddit_df.subreddit.value_counts()
subreddit

investing         987
stocks            985
wallstreetbets    953
Name: subreddit, dtype: int64

In [5]:
# Look at body value counts
body = reddit_df.body.value_counts()
body

Please use this thread to discuss your portfolio, learn of other stock tickers, and help out users by giving constructive criticism.\n\nWhy quarterly?  Public companies report earnings quarterly; many investors take this as an opportunity to rebalance their portfolios.  We highly recommend you do some reading:  A list of [relevant posts & book recommendations.](https://www.reddit.com/r/stocks/wiki/index#wiki_relevant_posts_.26amp.3B_book_recommendations)\n\nYou can find stocks on your own by using a scanner like your broker's or [Finviz.](https://finviz.com/screener.ashx)  To help further, here's a list of [relevant websites.](https://www.reddit.com/r/stocks/wiki/index#wiki_relevant_websites.2Fapps)\n\nIf you don't have a broker yet, see our [list of brokers](https://www.reddit.com/r/stocks/wiki/index#wiki_brokers_for_investing) or search old posts.  If you haven't started investing or trading yet, then setup your [paper trading.](https://www.reddit.com/r/stocks/wiki/index#wiki_is_ther

In [6]:
# Create SQL engine

from sqlalchemy import create_engine
engine = create_engine('sqlite://', echo=False)

In [7]:
# Read reddit_df dataframe to new SQL database. Currently set to replace on each run, until we can decide how 
# we want it to append with new CSV info in the future.

reddit_df.to_sql('reddit_raw_db', con=engine, if_exists='replace')
engine.execute("SELECT * FROM reddit_raw_db").fetchmany(size=20)

[(0, 'UPVOTE so everyone sees we got SUPPORT', 265029, 'wallstreetbets', 'https://i.redd.it/sgoqy8nyt2e61.png', 11825, None, '2021-01-29 00:40:34'),
 (1, 'GME YOLO update — Jan 28 2021', 230844, 'wallstreetbets', 'https://i.redd.it/opzucppb15e61.png', 23532, None, '2021-01-29 08:06:23'),
 (2, 'CLASS ACTION AGAINST ROBINHOOD. Allowing people to only sell is the definition of market manipulation. A class action must be started, Robinhood has made plenty of money off selling info about our trades to the hedge funds to be able to pay out a little for causing people to loose money now', 204920, 'wallstreetbets', 'https://www.reddit.com/r/wallstreetbets/comments/l6x130/class_action_against_robinhood_allowing_people_to/', 18318, 'LEAVE ROBINHOOD. They dont deserve to make money off us after the millions they caused in losses. It might take a couple of days, but send Robinhood to the ground and GME to the moon.', '2021-01-29 00:49:11'),
 (3, 'GME YOLO update — Jan 27 2021 ---------------------

In [8]:
# Remove the 'body' column.
reddit_df.drop(['body'], axis=1, inplace=True)
reddit_df.head()

Unnamed: 0,title,score,subreddit,url,num_comments,date
0,UPVOTE so everyone sees we got SUPPORT,265029,wallstreetbets,https://i.redd.it/sgoqy8nyt2e61.png,11825,2021-01-29 00:40:34
1,GME YOLO update — Jan 28 2021,230844,wallstreetbets,https://i.redd.it/opzucppb15e61.png,23532,2021-01-29 08:06:23
2,CLASS ACTION AGAINST ROBINHOOD. Allowing peopl...,204920,wallstreetbets,https://www.reddit.com/r/wallstreetbets/commen...,18318,2021-01-29 00:49:11
3,GME YOLO update — Jan 27 2021 ----------------...,185949,wallstreetbets,https://i.redd.it/a309gkm5yxd61.png,15495,2021-01-28 08:15:35
4,Can we all take a moment and appreciate the Mo...,184517,wallstreetbets,https://www.reddit.com/r/wallstreetbets/commen...,7105,2021-01-28 11:57:32


In [9]:
# Remove rows that have at least 1 null value.
reddit_df.dropna()

Unnamed: 0,title,score,subreddit,url,num_comments,date
0,UPVOTE so everyone sees we got SUPPORT,265029,wallstreetbets,https://i.redd.it/sgoqy8nyt2e61.png,11825,2021-01-29 00:40:34
1,GME YOLO update — Jan 28 2021,230844,wallstreetbets,https://i.redd.it/opzucppb15e61.png,23532,2021-01-29 08:06:23
2,CLASS ACTION AGAINST ROBINHOOD. Allowing peopl...,204920,wallstreetbets,https://www.reddit.com/r/wallstreetbets/commen...,18318,2021-01-29 00:49:11
3,GME YOLO update — Jan 27 2021 ----------------...,185949,wallstreetbets,https://i.redd.it/a309gkm5yxd61.png,15495,2021-01-28 08:15:35
4,Can we all take a moment and appreciate the Mo...,184517,wallstreetbets,https://www.reddit.com/r/wallstreetbets/commen...,7105,2021-01-28 11:57:32
...,...,...,...,...,...,...
2920,DID WE MISS THE BOTTOM?! How are people this i...,348,stocks,https://www.reddit.com/r/stocks/comments/g1m6u...,283,2020-04-15 17:03:23
2921,Favorite Solar Stock(s)?,348,stocks,https://www.reddit.com/r/stocks/comments/jmct3...,176,2020-11-02 10:46:49
2922,"""NIO forms battery asset company""",352,stocks,https://www.reddit.com/r/stocks/comments/id8z3...,102,2020-08-20 23:02:48
2923,Amazon is building a $1.5 billion hub for its ...,349,stocks,https://www.reddit.com/r/stocks/comments/5rghg...,145,2017-02-02 03:35:04


In [10]:
# Checking Sentiment Scores
def sentiment_analyzer_scores(sentence):
    score = analyzer.polarity_scores(sentence)
    print("{:-<40} {}".format(sentence, str(score)))
    
sentiment_analyzer_scores('UPVOTE so everyone sees we got SUPPORT')

UPVOTE so everyone sees we got SUPPORT-- {'neg': 0.0, 'neu': 0.636, 'pos': 0.364, 'compound': 0.5319}


In [11]:
#Add VADER metrics to dataframe
reddit_df['compound'] = [analyzer.polarity_scores(v)['compound'] for v in reddit_df['title']]

reddit_df['neg'] = [analyzer.polarity_scores(v)['neg'] for v in reddit_df['title']]

reddit_df['neu'] = [analyzer.polarity_scores(v)['neu'] for v in reddit_df['title']]

reddit_df['pos'] = [analyzer.polarity_scores(v)['pos'] for v in reddit_df['title']]

reddit_df.head()

Unnamed: 0,title,score,subreddit,url,num_comments,date,compound,neg,neu,pos
0,UPVOTE so everyone sees we got SUPPORT,265029,wallstreetbets,https://i.redd.it/sgoqy8nyt2e61.png,11825,2021-01-29 00:40:34,0.5319,0.0,0.636,0.364
1,GME YOLO update — Jan 28 2021,230844,wallstreetbets,https://i.redd.it/opzucppb15e61.png,23532,2021-01-29 08:06:23,0.4278,0.0,0.679,0.321
2,CLASS ACTION AGAINST ROBINHOOD. Allowing peopl...,204920,wallstreetbets,https://www.reddit.com/r/wallstreetbets/commen...,18318,2021-01-29 00:49:11,-0.5994,0.107,0.893,0.0
3,GME YOLO update — Jan 27 2021 ----------------...,185949,wallstreetbets,https://i.redd.it/a309gkm5yxd61.png,15495,2021-01-28 08:15:35,0.4278,0.0,0.841,0.159
4,Can we all take a moment and appreciate the Mo...,184517,wallstreetbets,https://www.reddit.com/r/wallstreetbets/commen...,7105,2021-01-28 11:57:32,0.6369,0.0,0.794,0.206


In [12]:
reddit_df.to_csv('Resources/reddit2.csv')

In [13]:
reddit_groups = reddit_df.groupby("subreddit")

In [14]:
# Grouping Vader Scores for each Subreddit
reddit_groups.mean()

Unnamed: 0_level_0,score,num_comments,compound,neg,neu,pos
subreddit,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
investing,1779.078014,424.946302,-0.009373,0.080139,0.842621,0.077234
stocks,1405.035533,311.643655,0.038948,0.060024,0.852797,0.087179
wallstreetbets,29348.439664,2753.270724,0.046143,0.066779,0.834423,0.098805


In [15]:
reddit_df = reddit_df[reddit_df["subreddit"]!="stocks"]
reddit_df.tail()

Unnamed: 0,title,score,subreddit,url,num_comments,date,compound,neg,neu,pos
1935,does anyone here have friends and family still...,898,investing,https://www.reddit.com/r/investing/comments/ef...,609,2019-12-26 05:47:26,-0.0258,0.143,0.717,0.139
1936,Jobs growth soars in November as payrolls surg...,905,investing,https://www.reddit.com/r/investing/comments/e6...,496,2019-12-07 00:41:05,0.3818,0.0,0.776,0.224
1937,"Yale economists argue that ""the most financial...",894,investing,https://www.reddit.com/r/investing/comments/en...,400,2020-01-13 05:35:24,0.046,0.1,0.792,0.108
1938,"Amazon earnings beat: $6.04 per share, vs. $5....",897,investing,https://www.reddit.com/r/investing/comments/al...,152,2019-02-01 08:04:51,0.4404,0.0,0.642,0.358
1939,U.S. stocks plunge after report that former na...,890,investing,https://www.reddit.com/r/investing/comments/7g...,377,2017-12-02 03:34:52,0.34,0.0,0.893,0.107


In [16]:
# Changing Subreddit values to be 1 if from WSB and 0 if not.
reddit_df['subreddit'] = reddit_df['subreddit'].apply(lambda x:1 if x == "wallstreetbets" else 0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [17]:
reddit_df.head()

Unnamed: 0,title,score,subreddit,url,num_comments,date,compound,neg,neu,pos
0,UPVOTE so everyone sees we got SUPPORT,265029,1,https://i.redd.it/sgoqy8nyt2e61.png,11825,2021-01-29 00:40:34,0.5319,0.0,0.636,0.364
1,GME YOLO update — Jan 28 2021,230844,1,https://i.redd.it/opzucppb15e61.png,23532,2021-01-29 08:06:23,0.4278,0.0,0.679,0.321
2,CLASS ACTION AGAINST ROBINHOOD. Allowing peopl...,204920,1,https://www.reddit.com/r/wallstreetbets/commen...,18318,2021-01-29 00:49:11,-0.5994,0.107,0.893,0.0
3,GME YOLO update — Jan 27 2021 ----------------...,185949,1,https://i.redd.it/a309gkm5yxd61.png,15495,2021-01-28 08:15:35,0.4278,0.0,0.841,0.159
4,Can we all take a moment and appreciate the Mo...,184517,1,https://www.reddit.com/r/wallstreetbets/commen...,7105,2021-01-28 11:57:32,0.6369,0.0,0.794,0.206


In [18]:
reddit_df.tail()

Unnamed: 0,title,score,subreddit,url,num_comments,date,compound,neg,neu,pos
1935,does anyone here have friends and family still...,898,0,https://www.reddit.com/r/investing/comments/ef...,609,2019-12-26 05:47:26,-0.0258,0.143,0.717,0.139
1936,Jobs growth soars in November as payrolls surg...,905,0,https://www.reddit.com/r/investing/comments/e6...,496,2019-12-07 00:41:05,0.3818,0.0,0.776,0.224
1937,"Yale economists argue that ""the most financial...",894,0,https://www.reddit.com/r/investing/comments/en...,400,2020-01-13 05:35:24,0.046,0.1,0.792,0.108
1938,"Amazon earnings beat: $6.04 per share, vs. $5....",897,0,https://www.reddit.com/r/investing/comments/al...,152,2019-02-01 08:04:51,0.4404,0.0,0.642,0.358
1939,U.S. stocks plunge after report that former na...,890,0,https://www.reddit.com/r/investing/comments/7g...,377,2017-12-02 03:34:52,0.34,0.0,0.893,0.107


In [19]:
# Read the cleaned and VADER modified reddit_df dataframe to new SQL database. Currently set to replace on each run, 
# until we can decide how we want it to append with new CSV info in the future.


reddit_df.to_sql('reddit_cleaned_with_VADER_db', con=engine, if_exists='replace')
engine.execute("SELECT * FROM reddit_cleaned_with_VADER_db").fetchmany(size=20)

[(0, 'UPVOTE so everyone sees we got SUPPORT', 265029, 1, 'https://i.redd.it/sgoqy8nyt2e61.png', 11825, '2021-01-29 00:40:34', 0.5319, 0.0, 0.636, 0.364),
 (1, 'GME YOLO update — Jan 28 2021', 230844, 1, 'https://i.redd.it/opzucppb15e61.png', 23532, '2021-01-29 08:06:23', 0.4278, 0.0, 0.679, 0.321),
 (2, 'CLASS ACTION AGAINST ROBINHOOD. Allowing people to only sell is the definition of market manipulation. A class action must be started, Robinhood has made plenty of money off selling info about our trades to the hedge funds to be able to pay out a little for causing people to loose money now', 204920, 1, 'https://www.reddit.com/r/wallstreetbets/comments/l6x130/class_action_against_robinhood_allowing_people_to/', 18318, '2021-01-29 00:49:11', -0.5994, 0.107, 0.893, 0.0),
 (3, 'GME YOLO update — Jan 27 2021 --------------------------------------- guess i need 102 characters in title now', 185949, 1, 'https://i.redd.it/a309gkm5yxd61.png', 15495, '2021-01-28 08:15:35', 0.4278, 0.0, 0.841, 

In [20]:
# Read from DB into a dataframe for ML model

reddit_cleaned_with_VADER_df = pd.read_sql("reddit_cleaned_with_VADER_db", con=engine)

In [21]:
reddit_cleaned_with_VADER_df.head()

Unnamed: 0,index,title,score,subreddit,url,num_comments,date,compound,neg,neu,pos
0,0,UPVOTE so everyone sees we got SUPPORT,265029,1,https://i.redd.it/sgoqy8nyt2e61.png,11825,2021-01-29 00:40:34,0.5319,0.0,0.636,0.364
1,1,GME YOLO update — Jan 28 2021,230844,1,https://i.redd.it/opzucppb15e61.png,23532,2021-01-29 08:06:23,0.4278,0.0,0.679,0.321
2,2,CLASS ACTION AGAINST ROBINHOOD. Allowing peopl...,204920,1,https://www.reddit.com/r/wallstreetbets/commen...,18318,2021-01-29 00:49:11,-0.5994,0.107,0.893,0.0
3,3,GME YOLO update — Jan 27 2021 ----------------...,185949,1,https://i.redd.it/a309gkm5yxd61.png,15495,2021-01-28 08:15:35,0.4278,0.0,0.841,0.159
4,4,Can we all take a moment and appreciate the Mo...,184517,1,https://www.reddit.com/r/wallstreetbets/commen...,7105,2021-01-28 11:57:32,0.6369,0.0,0.794,0.206


In [22]:
# Creating X and Y sets
y = reddit_cleaned_with_VADER_df["subreddit"]
X = reddit_cleaned_with_VADER_df.drop(columns=["index","subreddit","title","url","date","score","num_comments"])

In [23]:
# Breaking sets into train and test.
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, 
                                                    y, 
                                                    random_state=1, 
                                                    stratify=y)
X_train.shape

(1455, 4)

In [24]:
y.value_counts()

0    987
1    953
Name: subreddit, dtype: int64

In [25]:
# NOT NEEDED WHEN TAKING OUT SCORE AND NUM_COMMENTS
# create scaler instances
#scaler = StandardScaler()

# fit scaler
#X_scaler = scaler.fit(X_train)

# scale data
#X_train_scaled = X_scaler.transform(X_train)
#X_test_scaled = X_scaler.transform(X_test)

In [26]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(solver='lbfgs',
                                max_iter=200,
                                random_state=1)

In [27]:
classifier.fit(X_train, y_train)
#classifier.fit(X_train_scaled, y_train)

LogisticRegression(max_iter=200, random_state=1)

In [28]:
y_pred = classifier.predict(X_test)
results = pd.DataFrame({"Prediction": y_pred, "Actual": y_test}).reset_index(drop=True)
results.head(20)

Unnamed: 0,Prediction,Actual
0,0,0
1,0,0
2,0,1
3,1,1
4,0,0
5,0,1
6,0,0
7,0,0
8,1,1
9,0,1


In [29]:
from sklearn.metrics import accuracy_score
print(accuracy_score(y_test, y_pred))

0.5092783505154639


# Optimization Attempt #1 - Add Additional Hidden Layers

In [30]:
#X_train_scaled.shape[1]
X_train.shape[1]


4

In [31]:
# define model
number_input_features = X_train.shape[1]
#number_input_features = X_train_scaled.shape[1]
hidden_nodes_layer1 = 80
hidden_nodes_layer2 = 30
hidden_nodes_layer3 = 20

nn = tf.keras.models.Sequential()

# first hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation='relu'))

# second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation='relu'))

#third hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer3, activation='relu'))

# output layer
nn.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

# check structure of model
nn.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 80)                400       
_________________________________________________________________
dense_1 (Dense)              (None, 30)                2430      
_________________________________________________________________
dense_2 (Dense)              (None, 20)                620       
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 21        
Total params: 3,471
Trainable params: 3,471
Non-trainable params: 0
_________________________________________________________________


In [32]:
# Compile the model
nn.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [33]:
# Train the model
fit_model = nn.fit(X_train, y_train, epochs=100)
#fit_model = nn.fit(X_train_scaled, y_train, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [34]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test,y_test,verbose=2)
#model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

16/16 - 0s - loss: 0.6390 - accuracy: 0.6206
Loss: 0.6389980912208557, Accuracy: 0.6206185817718506


# Optimization Attempt #2: Adding Neurons in Layers & tanh for relu

In [35]:
# define model
number_input_features = X_train.shape[1]
hidden_nodes_layer1 = 90
hidden_nodes_layer2 = 40
hidden_nodes_layer3 = 30


nn = tf.keras.models.Sequential()

# first hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation='tanh'))

# second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation='tanh'))

#third hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer3, activation='tanh'))

# output layer
nn.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

# check structure of model
nn.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 90)                450       
_________________________________________________________________
dense_5 (Dense)              (None, 40)                3640      
_________________________________________________________________
dense_6 (Dense)              (None, 30)                1230      
_________________________________________________________________
dense_7 (Dense)              (None, 1)                 31        
Total params: 5,351
Trainable params: 5,351
Non-trainable params: 0
_________________________________________________________________


In [36]:
# Compile the model
nn.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [37]:
# Train the model
fit_model = nn.fit(X_train, y_train, epochs=63)

Epoch 1/63
Epoch 2/63
Epoch 3/63
Epoch 4/63
Epoch 5/63
Epoch 6/63
Epoch 7/63
Epoch 8/63
Epoch 9/63
Epoch 10/63
Epoch 11/63
Epoch 12/63
Epoch 13/63
Epoch 14/63
Epoch 15/63
Epoch 16/63
Epoch 17/63
Epoch 18/63
Epoch 19/63
Epoch 20/63
Epoch 21/63
Epoch 22/63
Epoch 23/63
Epoch 24/63
Epoch 25/63
Epoch 26/63
Epoch 27/63
Epoch 28/63
Epoch 29/63
Epoch 30/63
Epoch 31/63
Epoch 32/63
Epoch 33/63
Epoch 34/63
Epoch 35/63
Epoch 36/63
Epoch 37/63
Epoch 38/63
Epoch 39/63
Epoch 40/63
Epoch 41/63
Epoch 42/63
Epoch 43/63
Epoch 44/63
Epoch 45/63
Epoch 46/63
Epoch 47/63
Epoch 48/63
Epoch 49/63
Epoch 50/63
Epoch 51/63
Epoch 52/63
Epoch 53/63
Epoch 54/63
Epoch 55/63
Epoch 56/63
Epoch 57/63
Epoch 58/63
Epoch 59/63
Epoch 60/63
Epoch 61/63
Epoch 62/63
Epoch 63/63


In [38]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

16/16 - 0s - loss: 0.6497 - accuracy: 0.6124
Loss: 0.6496789455413818, Accuracy: 0.6123711466789246


### Optimization Attempt #3: Change Activation Function w/ Callback

In [39]:
# define checkpoint path and filenames
import os
os.makedirs("checkpoints/",exist_ok=True)
checkpoint_path = "checkpoints/weights.{epoch:02d}.hdf5"

In [40]:
# define model
number_input_features = X_train.shape[1]
hidden_nodes_layer1 = 80
hidden_nodes_layer2 = 30
hidden_nodes_layer3 = 20


nn = tf.keras.models.Sequential()

# first hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation='relu'))

# second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation='relu'))

# output layer
nn.add(tf.keras.layers.Dense(units=1, activation='tanh'))

# check structure of model
nn.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_8 (Dense)              (None, 80)                400       
_________________________________________________________________
dense_9 (Dense)              (None, 30)                2430      
_________________________________________________________________
dense_10 (Dense)             (None, 1)                 31        
Total params: 2,861
Trainable params: 2,861
Non-trainable params: 0
_________________________________________________________________


In [41]:
from tensorflow.keras.callbacks import ModelCheckpoint

# compile model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# create callback that saves weights every 5 epochs
cp_callback = ModelCheckpoint(
    filepath=checkpoint_path,
    verbose=1,
    save_weights_only=True,
    save_freq=1000)

# train model
fit_model = nn.fit(X_train,y_train,epochs=100,callbacks=[cp_callback])

# evaluate model using test data
model_loss, model_accuracy = nn.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
 1/46 [..............................] - ETA: 0s - loss: 7.2304 - accuracy: 0.5312
Epoch 00022: saving model to checkpoints/weights.22.hdf5
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
 1/46 [..............................] - ETA: 0s - loss: 6.7484 - accuracy: 0.5625
Epoch 00044: saving model to checkpoints/weights.44.hdf5
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
E

Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
 1/46 [..............................] - ETA: 0s - loss: 10.1226 - accuracy: 0.3438
Epoch 00087: saving model to checkpoints/weights.87.hdf5
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
16/16 - 0s - loss: 7.5694 - accuracy: 0.5093
Loss: 7.569356441497803, Accuracy: 0.5092783570289612
