**Notes to YT-Video: https://www.youtube.com/watch?v=c0k-YLQGKjY**

In [None]:
# Nice imports
import os
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# Get the user's home directory
user_home = os.path.expanduser("~")

# Specify the relative path to the target folder from the home directory
relative_path = 'HeroysundBridge-ML'

# Construct the full path to the target folder
target_folder = os.path.join(user_home, relative_path)

# Specify the name of the file you're looking for
file_name = 'combined.parquet'

# Construct the full path to the file
file_path = os.path.join(target_folder, file_name)

# Check if the file exists
if os.path.exists(file_path):
    print(f"Found '{file_name}' at: {file_path}")
else:
    print(f"File '{file_name}' not found in '{target_folder}'.")


In [None]:
df = pd.read_parquet(file_path)
df

**Making index (First column) set up in the correct way**

In [None]:
df.index = pd.to_datetime(df['Date'], format='%Y%m%d%H')
df

In [None]:
# Plot the time series
plt.plot(df.index)

# Add labels and title
plt.xlabel('Rows of the DataFrame')
plt.ylabel('Date')
plt.title('Time Series Plot')

# Show the plot
plt.show()

In [None]:
temp = df['Point_1_N_mean']


In [None]:
def df_to_X_y(df, window_size = 5):
   df_as_np = df.to_numpy()
   X = []
   y = []
   for i in range(len(df_as_np) - window_size):
       row = [[a] for a in df_as_np[i:i+window_size]]
       X.append(row)
       label = df_as_np[i+window_size]
       y.append(label)
   return np.array(X), np.array(y)

In [None]:
WINDOW_SIZE = 5
X, y = df_to_X_y(temp, window_size = WINDOW_SIZE)
X.shape, y.shape

In [None]:
X_train, y_train = X[:20000], y[:20000]
X_valid, y_valid = X[20000:25000], y[20000:25000]
X_test, y_test = X[25000:], y[25000:]
X_train.shape, X_valid.shape, X_test.shape

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.losses import mean_squared_error
from tensorflow.keras.optimizers import Adam

model1 = Sequential()
model1.add(InputLayer((WINDOW_SIZE, 1)))
model1.add(LSTM(64))
model1.add(Dense(8, activation='relu'))
model1.add(Dense(1, activation='linear'))
model1.summary()

# Assuming you want to use mean squared error as a metric
model1.compile(optimizer=Adam(), loss=mean_squared_error, metrics=['mse'])


In [None]:
cp = ModelCheckpoint(os.path.join(target_folder, 'model1/'), save_best_only=True)
model1.compile(optimizer=Adam(learning_rate=0.0001), loss=mean_squared_error, metrics=['mse'])

In [None]:
model1.fit(X_train, y_train, validation_data=(X_valid, y_valid), epochs=4, callbacks=[cp])

In [None]:
from tensorflow.keras.models import load_model

# Specify the full path to the model file
model_file_path = os.path.join(target_folder, 'model1/')

# Load the model from the specified file path
model1 = load_model(model_file_path)


In [None]:
train_predicitons = model1.predict(X_train).flatten()
train_results = pd.DataFrame(data={'Train Predictions': train_predicitons,'Actuals': y_train})
train_results

In [None]:
plt.plot(train_results['Actuals'], label='Actuals')
plt.plot(train_results['Train Predictions'], label='Predictions')

In [None]:
plt.plot(train_results['Actuals'][50:150], label='Actuals')
plt.plot(train_results['Train Predictions'][50:150], label='Predictions')
plt.legend()

**But we don't really care about how it did on the training, it has seen that data before**

In [None]:
val_predictions = model1.predict(X_valid).flatten()
val_results = pd.DataFrame(data={'Validation Predictions': val_predictions,'Actuals': y_valid}) 
val_results

In [None]:
plt.plot(val_results['Actuals'][1000:1500], label='Actuals')
plt.plot(val_results['Validation Predictions'][1000:1500], label='Predictions')
plt.legend()    

In [None]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))


In [None]:
print("Num CPUs Available: ", len(tf.config.list_physical_devices('CPU')))
