In [489]:
import sklearn.preprocessing
import tensorboard
import tensorflow as tf
import numpy as np
import keras.backend
import datetime

from packaging import version
from linkmethods import get_target, get_backlink_views
from keras.layers import *

In [490]:
print("TensorFlow version: ", tf.__version__)
assert version.parse(tf.__version__).release[0] >= 2, "This notebook requires TensorFlow 2.0 or above."

TensorFlow version:  2.11.0


In [491]:
tensorboard.__version__

'2.11.0'

In [492]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  0


In [493]:
article = "Data_Science"

In [494]:
df_main = get_target(article)

Directory already exists
reading in the file


In [495]:
bl_views = get_backlink_views(article, get_target(article))

Directory already exists
reading in the file
            views
timestamp        
2015070100    643
2015070200    573
2015070300    410
2015070400    199
2015070500    334
...           ...
2022062700    262
2022062800    241
2022062900    191
2022063000    226
2022070100    204

[2558 rows x 1 columns]
            views
timestamp        
2015070100    111
2015070200     78
2015070300     72
2015070400     59
2015070500     72
...           ...
2022062700     80
2022062800     66
2022062900     90
2022063000     89
2022070100     84

[2558 rows x 1 columns]
            views
timestamp        
2015070100    0.0
2015070200    0.0
2015070300    0.0
2015070400    0.0
2015070500    0.0
...           ...
2022062700    8.0
2022062800    8.0
2022062900    8.0
2022063000   23.0
2022070100   10.0

[2558 rows x 1 columns]
            views
timestamp        
2015070100    0.0
2015070200    0.0
2015070300    0.0
2015070400    0.0
2015070500    0.0
...           ...
2022062700    1.0
2022062800    0.

In [496]:
cutoff = -512

In [497]:
data = np.array(bl_views)[:,cutoff:-1]
print(data)
data.shape

[[463. 340. 374. ... 241. 191. 226.]
 [142. 143. 120. ...  66.  90.  89.]
 [  8.   3.   8. ...   8.   8.  23.]
 ...
 [208. 249. 270. ... 155. 190. 169.]
 [205. 185. 177. ... 157. 194. 134.]
 [ 10.   1.   1. ...  13.   4.   4.]]


(44, 511)

In [498]:
target = np.array(df_main.views, dtype=float)[cutoff:-1]
target = np.array([np.copy(target) for _ in range((data.shape[0]))])
print(target)
target.shape

[[219. 151. 186. ... 165. 217. 202.]
 [219. 151. 186. ... 165. 217. 202.]
 [219. 151. 186. ... 165. 217. 202.]
 ...
 [219. 151. 186. ... 165. 217. 202.]
 [219. 151. 186. ... 165. 217. 202.]
 [219. 151. 186. ... 165. 217. 202.]]


(44, 511)

In [499]:
def smape(tftarget, tfforecast):
    denominator = tf.reduce_sum(tf.abs(tftarget)+tf.abs(tfforecast))
    if tf.reduce_sum(tftarget)+tf.reduce_sum(tfforecast) == 0:
        return tf.cast(1.0,tf.float64)
    result = (2/len(tftarget)) * tf.cast(tf.reduce_sum(tf.abs(tftarget - tfforecast))/denominator,tf.float64)

    return result

In [500]:
# Define the model.
model = keras.Sequential()

# 1 dense hidden layer with 250 nodes ~90+ acc
model.add(Input(shape=-cutoff-1)) # Input tensor
model.add(Dense(units=-cutoff-1)) # hidden layer 1,
model.add(Activation(activation="sigmoid"))

model.compile(
    optimizer='Adam',
    loss=smape,
    metrics=['accuracy'])

log_dir = "logs/fit/" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
model.summary()

AttributeError: module 'datetime' has no attribute 'now'

In [None]:
keras.utils.plot_model(model, "my_first_model.png")

In [None]:
predata = sklearn.preprocessing.normalize(data,norm="l1")
pretarget = sklearn.preprocessing.normalize(target,norm="l1")
predata

In [None]:
from sklearn.model_selection import train_test_split

# Separate the test data
x, x_test, y, y_test = train_test_split(predata, pretarget, test_size=0.15, shuffle=True)

# Split the remaining data to train and validation
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.15, shuffle=True)

In [None]:
x_train.shape

In [None]:
y_train.shape

In [None]:
# Train the model.
early_stop = keras.callbacks.EarlyStopping(monitor="accuracy",patience=150, restore_best_weights=True)
history = model.fit(
    x_train,
    y_train,
    batch_size=4,
    epochs=150,
    validation_data=(x_val, y_val),
    callbacks=early_stop)

In [None]:
from matplotlib import pyplot as plt
def display_learning_curves(history):
    fig, axs = plt.subplots(2, 2, figsize=(19, 11))

    axs[0,0].plot(history.history["loss"])
    axs[0,0].legend(["train", "test"], loc="upper right")
    axs[0,0].set_xlabel("Epochs")
    axs[0,0].set_ylabel("Loss")

    axs[0,1].plot(history.history["accuracy"])
    axs[0,1].legend(["train", "test"], loc="upper right")
    axs[0,1].set_xlabel("Epochs")
    axs[0,1].set_ylabel("Accuracy")

    axs[1,0].plot(history.history["val_loss"])
    axs[1,0].legend(["train", "test"], loc="upper right")
    axs[1,0].set_xlabel("Epochs")
    axs[1,0].set_ylabel("val_loss")

    axs[1,1].plot(history.history["val_accuracy"])
    axs[1,1].legend(["train", "test"], loc="upper right")
    axs[1,1].set_xlabel("Epochs")
    axs[1,1].set_ylabel("val_Accuracy")
    plt.show()

In [None]:
display_learning_curves(history)

In [None]:
weights = model.get_weights()
print(sum(history.history['loss']) / len(history.history['loss']))
weights