In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Input

In [2]:
from api_keys import mongo_username, mongo_password
import pymongo

In [3]:
mongo_connection_string = f'mongodb+srv://{mongo_username}:{mongo_password}@cluster0.9gjuly6.mongodb.net/'
mongo_client = pymongo.MongoClient(mongo_connection_string)
mongo_db = mongo_client.properties
weather_collection = mongo_db.weather_data

In [4]:
all_data_cursor = weather_collection.find()
df = pd.DataFrame(list(all_data_cursor))

columns_to_drop = ['heating_degree_days', 'min_temperature', 'station_name', 'cooling_degree_days', 
                   'local_month', 'local_day', 'local_year', 'total_precipitation', 'snow_on_ground', 
                   'mean_temperature', 'total_snow', 'total_rain']

df_cleaned = df.drop(columns=columns_to_drop)

In [5]:
df_cleaned.sort_values('local_date', inplace=True)
df_cleaned.reset_index(drop=True, inplace=True)


n_steps = 365

def prepare_lstm_data(data, n_steps):
    X, y = [], []
    for i in range(len(data) - n_steps):
        end_ix = i + n_steps
        seq_x = data['max_temperature'].values[i:end_ix]
        seq_y = data['max_temperature'].values[end_ix]
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)

In [6]:
X, y = prepare_lstm_data(df_cleaned, n_steps)
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)
y_scaled = scaler.fit_transform(y.reshape(-1, 1)).flatten()

In [7]:
# Split data into training and validation sets
split_ratio = 0.8
split_idx = int(len(X_scaled) * split_ratio)
X_train, X_valid = X_scaled[:split_idx], X_scaled[split_idx:]
y_train, y_valid = y_scaled[:split_idx], y_scaled[split_idx:]

In [8]:
y_train = y_train.reshape(-1, 1)
y_valid = y_valid.reshape(-1, 1)

In [9]:
print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")


X_train shape: (7280, 365), y_train shape: (7280, 1)


In [10]:
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(n_steps, 1)))
model.add(Dense(1))

  super().__init__(**kwargs)


In [11]:
model.compile(optimizer='adam', loss='mse')


In [12]:
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_valid, y_valid), verbose=1)


Epoch 1/10
[1m228/228[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 70ms/step - loss: 0.0535 - val_loss: 0.0057
Epoch 2/10
[1m228/228[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 70ms/step - loss: 0.0065 - val_loss: 0.0053
Epoch 3/10
[1m228/228[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 71ms/step - loss: 0.0062 - val_loss: 0.0052
Epoch 4/10
[1m228/228[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 66ms/step - loss: 0.0065 - val_loss: 0.0050
Epoch 5/10
[1m228/228[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 66ms/step - loss: 0.0058 - val_loss: 0.0048
Epoch 6/10
[1m228/228[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 69ms/step - loss: 0.0060 - val_loss: 0.0050
Epoch 7/10
[1m228/228[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 69ms/step - loss: 0.0059 - val_loss: 0.0047
Epoch 8/10
[1m228/228[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 68ms/step - loss: 0.0058 - val_loss: 0.0048
Epoch 9/10
[1m228/228[