In [1]:
import pandas as pd
import numpy as np
import pyodbc
import sklearn as sk
import tensorflow as tf
import json

In [2]:
# Setup SQL
global con, cur
with open(r"sql_config.json") as f:
    sql_config = json.load(f)
con = pyodbc.connect(
    "Driver={SQL Server Native Client 11.0};"
    f"Server={sql_config['ip']},{sql_config['port']};"
    f"Database={sql_config['database']};"
    f"UID={sql_config['uid']};"
    f"PWD={sql_config['password']}"
)
cur = con.cursor()

In [3]:
# Pull all data from FatTrimmerData into df
df = pd.DataFrame()
sql = "SELECT * FROM FatTrimmerData;"
for chunk in pd.read_sql(sql, con, chunksize=10000):
    df = pd.concat([df, chunk])



In [4]:
# Remove any rows where DeltaTime is -1
df = df[df.DeltaTime != -1]
# Drop any duplicate rows incase scraping makes a mistake
df.drop_duplicates(inplace=True)
# Convert NewItems bool -> int
df["NewItems"] = df["NewItems"].astype(int)

In [5]:
# Create independent variables
x = df.drop(columns=["NewItems"])
# Create dependent variables
y = df["NewItems"]

In [6]:
df.head(10)

Unnamed: 0,NewItems,PageNumber,DeltaTime
4,0,274,4161000
5,0,1054,3501000
8,0,1326,7058000
9,0,114,4357000
11,0,660,2173000
12,0,1237,2170000
13,0,1001,3125000
14,0,560,6307000
16,0,81,4851000
19,0,1784,1663000


In [7]:
# How many gpus can tensorflow see
print(f"num physical devices: {len(tf.config.list_physical_devices('GPU'))}")

num physical devices: 0


In [8]:
from sklearn.model_selection import train_test_split
# Split data into training and testing (80-20 split)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

In [9]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense

# Crate a sequential model with 3 layers, each layer is named after an anime waifu
# Units describe the dimensionality of the layer output
# Activation is the function responsible for computing the output of the node
# TODO describe input shape bs
model = Sequential([
    Dense(units=256, input_shape=(x_train.shape[1],), activation='sigmoid', name="Zero"),
    Dense(units=256, activation='sigmoid', name="Rem"),
    Dense(units=1, activation='sigmoid', name="Megumin")
])

In [10]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Zero (Dense)                (None, 256)               768       
                                                                 
 Rem (Dense)                 (None, 256)               65792     
                                                                 
 Megumin (Dense)             (None, 1)                 257       
                                                                 
Total params: 66,817
Trainable params: 66,817
Non-trainable params: 0
_________________________________________________________________


In [11]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# POINT OF NO RETURN

Below is the code that runs the network, be careful.

(see: 'I, Robot' starring Will Smith)

In [13]:

print(f"x_train shape: {x_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"x_train size: {x_train.size}")
print(f"y_train size: {y_train.size}")
# Epochs describe amount of times to iterate over x and y, more -> longer exec.
model.fit(x_train, y_train, epochs=1000)
model.evaluate(x_test, y_test)

x_train shape: (162716, 2)
y_train shape: (162716,)
x_train size: 325432
y_train size: 162716
Epoch 1/1000

KeyboardInterrupt: 