In [23]:
import pandas as pd
import numpy as np
import pyodbc
import sklearn as sk
import tensorflow as tf
import json

In [24]:
# Setup SQL
global con, cur
with open(r"sql_config.json") as f:
    sql_config = json.load(f)
con = pyodbc.connect(
    "Driver={SQL Server Native Client 11.0};"
    f"Server={sql_config['ip']},{sql_config['port']};"
    f"Database={sql_config['database']};"
    f"UID={sql_config['uid']};"
    f"PWD={sql_config['password']}"
)
cur = con.cursor()

In [25]:
# Pull all data from FatTrimmerData into df
df = pd.DataFrame()
sql = "SELECT * FROM FatTrimmerData;"
for chunk in pd.read_sql(sql, con, chunksize=10000):
    df = pd.concat([df, chunk])



In [26]:
# Remove any rows where DeltaTime is -1
df = df[df.DeltaTime != -1]
# Drop any duplicate rows incase scraping makes a mistake
df.drop_duplicates(inplace=True)
# Convert NewItems bool -> int
df["NewItems"] = df["NewItems"].astype(int)

In [27]:
# Create independent variables
x = df.drop(columns=["NewItems"])
# Create dependent variables
y = df["NewItems"]

In [28]:
df.head(10)

Unnamed: 0,NewItems,PageNumber,DeltaTime
4,0,274,4161000
5,0,1054,3501000
8,0,1326,7058000
9,0,114,4357000
11,0,660,2173000
12,0,1237,2170000
13,0,1001,3125000
14,0,560,6307000
16,0,81,4851000
19,0,1784,1663000


In [29]:
# How many gpus can tensorflow see
print(f"num physical devices: {len(tf.config.list_physical_devices('GPU'))}")

num physical devices: 1


In [30]:
from sklearn.model_selection import train_test_split
# Split data into training and testing (80-20 split)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

In [31]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense

# Crate a sequential model with 3 layers, each layer is named after an anime waifu
# Units describe the dimensionality of the layer output
# Activation is the function responsible for computing the output of the node
# TODO describe input shape bs
model = Sequential([
    Dense(units=1024, input_shape=(x_train.shape[1],), activation='sigmoid', name="Zero"),
    Dense(units=2048, activation='sigmoid', name="Rem"),
    Dense(units=1, activation='sigmoid', name="Megumin")
])

In [32]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Zero (Dense)                (None, 1024)              3072      
                                                                 
 Rem (Dense)                 (None, 2048)              2099200   
                                                                 
 Megumin (Dense)             (None, 1)                 2049      
                                                                 
Total params: 2,104,321
Trainable params: 2,104,321
Non-trainable params: 0
_________________________________________________________________


In [33]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# POINT OF NO RETURN

Below is the code that runs the network, be careful.

(see: 'I, Robot' starring Will Smith)

In [38]:

print(f"x_train shape: {x_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"x_train size: {x_train.size}")
print(f"y_train size: {y_train.size}")
# Epochs describe amount of times to iterate over x and y, more -> longer exec.
model.fit(x_train, y_train, epochs=10000,batch_size=25000)
model.evaluate(x_test, y_test)

x_train shape: (164203, 2)
y_train shape: (164203,)
x_train size: 328406
y_train size: 164203
Epoch 1/10000
Epoch 2/10000
Epoch 3/10000
Epoch 4/10000
Epoch 5/10000
Epoch 6/10000
Epoch 7/10000
Epoch 8/10000
Epoch 9/10000
Epoch 10/10000
Epoch 11/10000
Epoch 12/10000
Epoch 13/10000
Epoch 14/10000
Epoch 15/10000
Epoch 16/10000
Epoch 17/10000
Epoch 18/10000
Epoch 19/10000
Epoch 20/10000
Epoch 21/10000
Epoch 22/10000
Epoch 23/10000
Epoch 24/10000
Epoch 25/10000
Epoch 26/10000
Epoch 27/10000
Epoch 28/10000
Epoch 29/10000
Epoch 30/10000
Epoch 31/10000
Epoch 32/10000
Epoch 33/10000
Epoch 34/10000
Epoch 35/10000
Epoch 36/10000
Epoch 37/10000
Epoch 38/10000
Epoch 39/10000
Epoch 40/10000
Epoch 41/10000
Epoch 42/10000
Epoch 43/10000
Epoch 44/10000
Epoch 45/10000
Epoch 46/10000
Epoch 47/10000
Epoch 48/10000
Epoch 49/10000
Epoch 50/10000
Epoch 51/10000
Epoch 52/10000
Epoch 53/10000
Epoch 54/10000
Epoch 55/10000
Epoch 56/10000
Epoch 57/10000
Epoch 58/10000
Epoch 59/10000
Epoch 60/10000
Epoch 61/10000


KeyboardInterrupt: 