In [26]:
# Loading Dependencies
from path import Path
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

#load VADER
analyzer = SentimentIntensityAnalyzer()

In [2]:
# Loading Data
data = Path('Resources/reddit.csv')
reddit_df = pd.read_csv(data)
reddit_df.head()

Unnamed: 0,title,score,subreddit,url,num_comments,body,date
0,UPVOTE so everyone sees we got SUPPORT,265029,wallstreetbets,https://i.redd.it/sgoqy8nyt2e61.png,11825,,2021-01-29 00:40:34
1,GME YOLO update — Jan 28 2021,230844,wallstreetbets,https://i.redd.it/opzucppb15e61.png,23532,,2021-01-29 08:06:23
2,CLASS ACTION AGAINST ROBINHOOD. Allowing peopl...,204920,wallstreetbets,https://www.reddit.com/r/wallstreetbets/commen...,18318,LEAVE ROBINHOOD. They dont deserve to make mon...,2021-01-29 00:49:11
3,GME YOLO update — Jan 27 2021 ----------------...,185949,wallstreetbets,https://i.redd.it/a309gkm5yxd61.png,15495,,2021-01-28 08:15:35
4,Can we all take a moment and appreciate the Mo...,184517,wallstreetbets,https://www.reddit.com/r/wallstreetbets/commen...,7105,,2021-01-28 11:57:32


In [3]:
# Checking DTypes
reddit_df.dtypes

title           object
score            int64
subreddit       object
url             object
num_comments     int64
body            object
date            object
dtype: object

In [4]:
# Look at subreddit value counts
subreddit = reddit_df.subreddit.value_counts()
subreddit

investing         987
stocks            985
wallstreetbets    953
Name: subreddit, dtype: int64

In [5]:
# Look at body value counts
body = reddit_df.body.value_counts()
body

Please use this thread to discuss your portfolio, learn of other stock tickers, and help out users by giving constructive criticism.\r\n\r\nWhy quarterly?  Public companies report earnings quarterly; many investors take this as an opportunity to rebalance their portfolios.  We highly recommend you do some reading:  A list of [relevant posts & book recommendations.](https://www.reddit.com/r/stocks/wiki/index#wiki_relevant_posts_.26amp.3B_book_recommendations)\r\n\r\nYou can find stocks on your own by using a scanner like your broker's or [Finviz.](https://finviz.com/screener.ashx)  To help further, here's a list of [relevant websites.](https://www.reddit.com/r/stocks/wiki/index#wiki_relevant_websites.2Fapps)\r\n\r\nIf you don't have a broker yet, see our [list of brokers](https://www.reddit.com/r/stocks/wiki/index#wiki_brokers_for_investing) or search old posts.  If you haven't started investing or trading yet, then setup your [paper trading.](https://www.reddit.com/r/stocks/wiki/index#

In [6]:
# Remove the 'body' column.
reddit_df.drop(['body'], axis=1, inplace=True)
reddit_df.head()

Unnamed: 0,title,score,subreddit,url,num_comments,date
0,UPVOTE so everyone sees we got SUPPORT,265029,wallstreetbets,https://i.redd.it/sgoqy8nyt2e61.png,11825,2021-01-29 00:40:34
1,GME YOLO update — Jan 28 2021,230844,wallstreetbets,https://i.redd.it/opzucppb15e61.png,23532,2021-01-29 08:06:23
2,CLASS ACTION AGAINST ROBINHOOD. Allowing peopl...,204920,wallstreetbets,https://www.reddit.com/r/wallstreetbets/commen...,18318,2021-01-29 00:49:11
3,GME YOLO update — Jan 27 2021 ----------------...,185949,wallstreetbets,https://i.redd.it/a309gkm5yxd61.png,15495,2021-01-28 08:15:35
4,Can we all take a moment and appreciate the Mo...,184517,wallstreetbets,https://www.reddit.com/r/wallstreetbets/commen...,7105,2021-01-28 11:57:32


In [7]:
# Remove rows that have at least 1 null value.
reddit_df.dropna()

Unnamed: 0,title,score,subreddit,url,num_comments,date
0,UPVOTE so everyone sees we got SUPPORT,265029,wallstreetbets,https://i.redd.it/sgoqy8nyt2e61.png,11825,2021-01-29 00:40:34
1,GME YOLO update — Jan 28 2021,230844,wallstreetbets,https://i.redd.it/opzucppb15e61.png,23532,2021-01-29 08:06:23
2,CLASS ACTION AGAINST ROBINHOOD. Allowing peopl...,204920,wallstreetbets,https://www.reddit.com/r/wallstreetbets/commen...,18318,2021-01-29 00:49:11
3,GME YOLO update — Jan 27 2021 ----------------...,185949,wallstreetbets,https://i.redd.it/a309gkm5yxd61.png,15495,2021-01-28 08:15:35
4,Can we all take a moment and appreciate the Mo...,184517,wallstreetbets,https://www.reddit.com/r/wallstreetbets/commen...,7105,2021-01-28 11:57:32
...,...,...,...,...,...,...
2920,DID WE MISS THE BOTTOM?! How are people this i...,348,stocks,https://www.reddit.com/r/stocks/comments/g1m6u...,283,2020-04-15 17:03:23
2921,Favorite Solar Stock(s)?,348,stocks,https://www.reddit.com/r/stocks/comments/jmct3...,176,2020-11-02 10:46:49
2922,"""NIO forms battery asset company""",352,stocks,https://www.reddit.com/r/stocks/comments/id8z3...,102,2020-08-20 23:02:48
2923,Amazon is building a $1.5 billion hub for its ...,349,stocks,https://www.reddit.com/r/stocks/comments/5rghg...,145,2017-02-02 03:35:04


In [8]:
# Checking Sentiment Scores
def sentiment_analyzer_scores(sentence):
    score = analyzer.polarity_scores(sentence)
    print("{:-<40} {}".format(sentence, str(score)))
    
sentiment_analyzer_scores('UPVOTE so everyone sees we got SUPPORT')

UPVOTE so everyone sees we got SUPPORT-- {'neg': 0.0, 'neu': 0.636, 'pos': 0.364, 'compound': 0.5319}


In [9]:
#Add VADER metrics to dataframe
reddit_df['compound'] = [analyzer.polarity_scores(v)['compound'] for v in reddit_df['title']]

reddit_df['neg'] = [analyzer.polarity_scores(v)['neg'] for v in reddit_df['title']]

reddit_df['neu'] = [analyzer.polarity_scores(v)['neu'] for v in reddit_df['title']]

reddit_df['pos'] = [analyzer.polarity_scores(v)['pos'] for v in reddit_df['title']]

reddit_df.head()

Unnamed: 0,title,score,subreddit,url,num_comments,date,compound,neg,neu,pos
0,UPVOTE so everyone sees we got SUPPORT,265029,wallstreetbets,https://i.redd.it/sgoqy8nyt2e61.png,11825,2021-01-29 00:40:34,0.5319,0.0,0.636,0.364
1,GME YOLO update — Jan 28 2021,230844,wallstreetbets,https://i.redd.it/opzucppb15e61.png,23532,2021-01-29 08:06:23,0.4278,0.0,0.679,0.321
2,CLASS ACTION AGAINST ROBINHOOD. Allowing peopl...,204920,wallstreetbets,https://www.reddit.com/r/wallstreetbets/commen...,18318,2021-01-29 00:49:11,-0.5994,0.107,0.893,0.0
3,GME YOLO update — Jan 27 2021 ----------------...,185949,wallstreetbets,https://i.redd.it/a309gkm5yxd61.png,15495,2021-01-28 08:15:35,0.4278,0.0,0.841,0.159
4,Can we all take a moment and appreciate the Mo...,184517,wallstreetbets,https://www.reddit.com/r/wallstreetbets/commen...,7105,2021-01-28 11:57:32,0.6369,0.0,0.794,0.206


In [10]:
reddit_groups = reddit_df.groupby("subreddit")

In [11]:
# Grouping Vader Scores for each Subreddit
reddit_groups.mean()

Unnamed: 0_level_0,score,num_comments,compound,neg,neu,pos
subreddit,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
investing,1779.078014,424.946302,-0.009373,0.080139,0.842621,0.077234
stocks,1405.035533,311.643655,0.038948,0.060024,0.852797,0.087179
wallstreetbets,29348.439664,2753.270724,0.046143,0.066779,0.834423,0.098805


In [12]:
reddit_df = reddit_df[reddit_df["subreddit"]!="stocks"]
reddit_df.tail()

Unnamed: 0,title,score,subreddit,url,num_comments,date,compound,neg,neu,pos
1935,does anyone here have friends and family still...,898,investing,https://www.reddit.com/r/investing/comments/ef...,609,2019-12-26 05:47:26,-0.0258,0.143,0.717,0.139
1936,Jobs growth soars in November as payrolls surg...,905,investing,https://www.reddit.com/r/investing/comments/e6...,496,2019-12-07 00:41:05,0.3818,0.0,0.776,0.224
1937,"Yale economists argue that ""the most financial...",894,investing,https://www.reddit.com/r/investing/comments/en...,400,2020-01-13 05:35:24,0.046,0.1,0.792,0.108
1938,"Amazon earnings beat: $6.04 per share, vs. $5....",897,investing,https://www.reddit.com/r/investing/comments/al...,152,2019-02-01 08:04:51,0.4404,0.0,0.642,0.358
1939,U.S. stocks plunge after report that former na...,890,investing,https://www.reddit.com/r/investing/comments/7g...,377,2017-12-02 03:34:52,0.34,0.0,0.893,0.107


In [13]:
# Changing Subreddit values to be 1 if from WSB and 0 if not.
reddit_df['subreddit'] = reddit_df['subreddit'].apply(lambda x:1 if x == "wallstreetbets" else 0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [14]:
reddit_df.head()

Unnamed: 0,title,score,subreddit,url,num_comments,date,compound,neg,neu,pos
0,UPVOTE so everyone sees we got SUPPORT,265029,1,https://i.redd.it/sgoqy8nyt2e61.png,11825,2021-01-29 00:40:34,0.5319,0.0,0.636,0.364
1,GME YOLO update — Jan 28 2021,230844,1,https://i.redd.it/opzucppb15e61.png,23532,2021-01-29 08:06:23,0.4278,0.0,0.679,0.321
2,CLASS ACTION AGAINST ROBINHOOD. Allowing peopl...,204920,1,https://www.reddit.com/r/wallstreetbets/commen...,18318,2021-01-29 00:49:11,-0.5994,0.107,0.893,0.0
3,GME YOLO update — Jan 27 2021 ----------------...,185949,1,https://i.redd.it/a309gkm5yxd61.png,15495,2021-01-28 08:15:35,0.4278,0.0,0.841,0.159
4,Can we all take a moment and appreciate the Mo...,184517,1,https://www.reddit.com/r/wallstreetbets/commen...,7105,2021-01-28 11:57:32,0.6369,0.0,0.794,0.206


In [15]:
reddit_df.tail()

Unnamed: 0,title,score,subreddit,url,num_comments,date,compound,neg,neu,pos
1935,does anyone here have friends and family still...,898,0,https://www.reddit.com/r/investing/comments/ef...,609,2019-12-26 05:47:26,-0.0258,0.143,0.717,0.139
1936,Jobs growth soars in November as payrolls surg...,905,0,https://www.reddit.com/r/investing/comments/e6...,496,2019-12-07 00:41:05,0.3818,0.0,0.776,0.224
1937,"Yale economists argue that ""the most financial...",894,0,https://www.reddit.com/r/investing/comments/en...,400,2020-01-13 05:35:24,0.046,0.1,0.792,0.108
1938,"Amazon earnings beat: $6.04 per share, vs. $5....",897,0,https://www.reddit.com/r/investing/comments/al...,152,2019-02-01 08:04:51,0.4404,0.0,0.642,0.358
1939,U.S. stocks plunge after report that former na...,890,0,https://www.reddit.com/r/investing/comments/7g...,377,2017-12-02 03:34:52,0.34,0.0,0.893,0.107


In [None]:
reddit_df

In [16]:
# Creating X and Y sets
y = reddit_df["subreddit"]
X = reddit_df.drop(columns=["subreddit","title","url","date","num_comments","score"])

In [17]:
# Breaking sets into train and test.
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, 
                                                    y, 
                                                    random_state=1, 
                                                    stratify=y)
X_train.shape

(1455, 4)

In [18]:
y.value_counts()

0    987
1    953
Name: subreddit, dtype: int64

In [19]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(solver='lbfgs',
                                max_iter=200,
                                random_state=1)

In [20]:
classifier.fit(X_train, y_train)

LogisticRegression(max_iter=200, random_state=1)

In [24]:
y_pred = classifier.predict(X_test)
results = pd.DataFrame({"Prediction": y_pred, "Actual": y_test}).reset_index(drop=True)
print(results)

     Prediction  Actual
0             0       0
1             0       0
2             0       1
3             1       1
4             0       0
..          ...     ...
480           0       0
481           0       1
482           0       0
483           0       0
484           1       1

[485 rows x 2 columns]


In [25]:
from sklearn.metrics import accuracy_score
print(accuracy_score(y_test, y_pred))

0.5092783505154639


# Optimization Attempt #1 - Add Additional Hidden Layers

In [33]:
X_train.shape[1]

4

In [34]:
# define model
number_input_features = X_train.shape[1]
hidden_nodes_layer1 = 80
hidden_nodes_layer2 = 30
hidden_nodes_layer3 = 20

nn = tf.keras.models.Sequential()

# first hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation='relu'))

# second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation='relu'))

#third hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer3, activation='relu'))

# output layer
nn.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

# check structure of model
nn.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 80)                400       
_________________________________________________________________
dense_1 (Dense)              (None, 30)                2430      
_________________________________________________________________
dense_2 (Dense)              (None, 20)                620       
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 21        
Total params: 3,471
Trainable params: 3,471
Non-trainable params: 0
_________________________________________________________________


In [35]:
# Compile the model
nn.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [36]:
# Train the model
fit_model = nn.fit(X_train, y_train, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [39]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

16/16 - 0s - loss: 0.6389 - accuracy: 0.6351
Loss: 0.6389114856719971, Accuracy: 0.6350515484809875


# Optimization Attempt #2: Adding Neurons in Layers

In [40]:
# define model
number_input_features = X_train.shape[1]
hidden_nodes_layer1 = 90
hidden_nodes_layer2 = 30


nn = tf.keras.models.Sequential()

# first hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation='relu'))

# second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation='relu'))

# output layer
nn.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

# check structure of model
nn.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 90)                450       
_________________________________________________________________
dense_5 (Dense)              (None, 30)                2730      
_________________________________________________________________
dense_6 (Dense)              (None, 1)                 31        
Total params: 3,211
Trainable params: 3,211
Non-trainable params: 0
_________________________________________________________________


In [41]:
# Compile the model
nn.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [42]:
# Train the model
fit_model = nn.fit(X_train, y_train, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [43]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

16/16 - 0s - loss: 0.6476 - accuracy: 0.6082
Loss: 0.6476240754127502, Accuracy: 0.6082473993301392


# Optmization Attempt #3 - Add Epochs

In [44]:
# define model
number_input_features = X_train.shape[1]
hidden_nodes_layer1 = 80
hidden_nodes_layer2 = 30

nn = tf.keras.models.Sequential()

# first hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation='relu'))

# second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation='relu'))

# output layer
nn.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

# check structure of model
nn.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_7 (Dense)              (None, 80)                400       
_________________________________________________________________
dense_8 (Dense)              (None, 30)                2430      
_________________________________________________________________
dense_9 (Dense)              (None, 1)                 31        
Total params: 2,861
Trainable params: 2,861
Non-trainable params: 0
_________________________________________________________________


In [45]:
#compile model
nn.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [46]:
# train model
fit_model = nn.fit(X_train, y_train, epochs=150)

Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78

Epoch 83/150
Epoch 84/150
Epoch 85/150
Epoch 86/150
Epoch 87/150
Epoch 88/150
Epoch 89/150
Epoch 90/150
Epoch 91/150
Epoch 92/150
Epoch 93/150
Epoch 94/150
Epoch 95/150
Epoch 96/150
Epoch 97/150
Epoch 98/150
Epoch 99/150
Epoch 100/150
Epoch 101/150
Epoch 102/150
Epoch 103/150
Epoch 104/150
Epoch 105/150
Epoch 106/150
Epoch 107/150
Epoch 108/150
Epoch 109/150
Epoch 110/150
Epoch 111/150
Epoch 112/150
Epoch 113/150
Epoch 114/150
Epoch 115/150
Epoch 116/150
Epoch 117/150
Epoch 118/150
Epoch 119/150
Epoch 120/150
Epoch 121/150
Epoch 122/150
Epoch 123/150
Epoch 124/150
Epoch 125/150
Epoch 126/150
Epoch 127/150
Epoch 128/150
Epoch 129/150
Epoch 130/150
Epoch 131/150
Epoch 132/150
Epoch 133/150
Epoch 134/150
Epoch 135/150
Epoch 136/150
Epoch 137/150
Epoch 138/150
Epoch 139/150
Epoch 140/150
Epoch 141/150
Epoch 142/150
Epoch 143/150
Epoch 144/150
Epoch 145/150
Epoch 146/150
Epoch 147/150
Epoch 148/150
Epoch 149/150
Epoch 150/150


In [47]:
# evaluate model using test data
model_loss, model_accuracy = nn.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

16/16 - 0s - loss: 0.6367 - accuracy: 0.6103
Loss: 0.6367443203926086, Accuracy: 0.6103093028068542


# Optimization Attempt #4: Change Activation Function w/ Callback

In [49]:
# define checkpoint path and filenames
import os
os.makedirs("checkpoints/",exist_ok=True)
checkpoint_path = "checkpoints/weights.{epoch:02d}.hdf5"

In [50]:
# define model
number_input_features = X_train.shape[1]
hidden_nodes_layer1 = 80
hidden_nodes_layer2 = 30

nn = tf.keras.models.Sequential()

# first hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation='relu'))

# second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation='relu'))

# output layer
nn.add(tf.keras.layers.Dense(units=1, activation='tanh'))

# check structure of model
nn.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_10 (Dense)             (None, 80)                400       
_________________________________________________________________
dense_11 (Dense)             (None, 30)                2430      
_________________________________________________________________
dense_12 (Dense)             (None, 1)                 31        
Total params: 2,861
Trainable params: 2,861
Non-trainable params: 0
_________________________________________________________________


In [53]:
from tensorflow.keras.callbacks import ModelCheckpoint

# compile model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# create callback that saves weights every 5 epochs
cp_callback = ModelCheckpoint(
    filepath=checkpoint_path,
    verbose=1,
    save_weights_only=True,
    save_freq=1000)

# train model
fit_model = nn.fit(X_train,y_train,epochs=100,callbacks=[cp_callback])

# evaluate model using test data
model_loss, model_accuracy = nn.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
 1/46 [..............................] - ETA: 0s - loss: 0.6954 - accuracy: 0.6562
Epoch 00022: saving model to checkpoints\weights.22.hdf5
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
 1/46 [..............................] - ETA: 0s - loss: 0.5992 - accuracy: 0.6562
Epoch 00044: saving model to checkpoints\weights.44.hdf5
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
E

Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
 1/46 [..............................] - ETA: 0s - loss: 0.6292 - accuracy: 0.6250
Epoch 00087: saving model to checkpoints\weights.87.hdf5
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
16/16 - 0s - loss: 0.6669 - accuracy: 0.6165
Loss: 0.6669048070907593, Accuracy: 0.6164948344230652
