In [None]:
n

# Recurrent Neural Networks
You should build an end-to-end machine learning pipeline using a recurrent neural network model. In particular, you should do the following:
- Load the `jena climate` dataset using [Pandas](https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html). You can find this dataset in the [keras repository](https://keras.io/examples/timeseries/timeseries_weather_forecasting/).
- Split the dataset into training, validation, and test sets. Note that you cannot split time series using [Scikit-Learn](https://keras.io/examples/timeseries/timeseries_weather_forecasting/).
- Build an end-to-end machine learning pipeline, including a [recurrent neural network](https://keras.io/examples/timeseries/timeseries_weather_forecasting/) model.
- Optimize your pipeline by validating your design decisions.
- Test the best pipeline on the test set and report various [evaluation metrics](https://scikit-learn.org/0.15/modules/model_evaluation.html).  
- Check the documentation to identify the most important hyperparameters, attributes, and methods of the model. Use them in practice.

In [None]:
'''
1. load the dataset
2.take a sample of the data, for exapmle you can take a mesurements that belong to 12:00 every day
3.SPLITING THE DATA into training validation and test sets (not randomly, timewise manner)
4. you need to take the class creat sequencws of length N(for example 10)
5.design and train your model with LSTM/GRU
6.make prediction on the test and calculate R2 '''

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import keras

keep oin mind that C-born library is better than matplotlib

In [3]:
from zipfile import ZipFile

uri = "https://storage.googleapis.com/tensorflow/tf-keras-datasets/jena_climate_2009_2016.csv.zip"
zip_path = keras.utils.get_file(origin=uri, fname="jena_climate_2009_2016.csv.zip")
zip_file = ZipFile(zip_path)
zip_file.extractall()
csv_path = "jena_climate_2009_2016.csv"

df = pd.read_csv(csv_path)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/jena_climate_2009_2016.csv.zip
[1m13568290/13568290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [6]:
df.head()

Unnamed: 0,Date Time,p (mbar),T (degC),Tpot (K),Tdew (degC),rh (%),VPmax (mbar),VPact (mbar),VPdef (mbar),sh (g/kg),H2OC (mmol/mol),rho (g/m**3),wv (m/s),max. wv (m/s),wd (deg)
0,01.01.2009 00:10:00,996.52,-8.02,265.4,-8.9,93.3,3.33,3.11,0.22,1.94,3.12,1307.75,1.03,1.75,152.3
1,01.01.2009 00:20:00,996.57,-8.41,265.01,-9.28,93.4,3.23,3.02,0.21,1.89,3.03,1309.8,0.72,1.5,136.1
2,01.01.2009 00:30:00,996.53,-8.51,264.91,-9.31,93.9,3.21,3.01,0.2,1.88,3.02,1310.24,0.19,0.63,171.6
3,01.01.2009 00:40:00,996.51,-8.31,265.12,-9.07,94.2,3.26,3.07,0.19,1.92,3.08,1309.19,0.34,0.5,198.0
4,01.01.2009 00:50:00,996.51,-8.27,265.15,-9.04,94.1,3.27,3.08,0.19,1.92,3.09,1309.0,0.32,0.63,214.3


In [8]:
df.sort_values('Date Time', inplace=True)

In [10]:
df.shape

(420551, 15)

In [15]:
# Convert the 'Date Time' column to datetime objects
df['Date Time'] = pd.to_datetime(df['Date Time'], format="%d.%m.%Y %H:%M:%S")

In [19]:
#filter rows = 12:00
noon_df = df[df['Date Time'].dt.hour == 12].copy()

In [20]:
# Reset index for cleanliness
noon_df.reset_index(drop=True, inplace=True)

In [23]:
import numpy as np

In [24]:
def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length])
    return np.array(X), np.array(y)

In [26]:
# Extract the 'value' column as a numpy array
values = noon_df['T (degC)'].values

In [27]:
# Define sequence length (e.g., 30 days)
seq_length = 30

In [28]:
# Create sequences
X, y = create_sequences(values, seq_length)

In [29]:
# Reshape X for RNN input (samples, timesteps, features)
X = X.reshape((X.shape[0], X.shape[1], 1))

In [30]:
train_size = int(0.8 * len(X))
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

In [31]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

In [32]:
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(seq_length, 1)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test), verbose=2)

  super().__init__(**kwargs)


Epoch 1/50
438/438 - 9s - 20ms/step - loss: 52.2524 - val_loss: 8.3708
Epoch 2/50
438/438 - 7s - 16ms/step - loss: 7.7313 - val_loss: 7.9716
Epoch 3/50
438/438 - 6s - 13ms/step - loss: 7.1406 - val_loss: 7.2460
Epoch 4/50
438/438 - 11s - 25ms/step - loss: 6.6197 - val_loss: 7.3936
Epoch 5/50
438/438 - 8s - 17ms/step - loss: 6.5179 - val_loss: 7.1376
Epoch 6/50
438/438 - 9s - 19ms/step - loss: 6.4665 - val_loss: 6.9148
Epoch 7/50
438/438 - 10s - 23ms/step - loss: 6.2643 - val_loss: 6.7928
Epoch 8/50
438/438 - 7s - 16ms/step - loss: 6.2989 - val_loss: 6.7949
Epoch 9/50
438/438 - 6s - 13ms/step - loss: 6.1573 - val_loss: 6.4099
Epoch 10/50
438/438 - 10s - 23ms/step - loss: 6.0373 - val_loss: 6.4701
Epoch 11/50
438/438 - 7s - 17ms/step - loss: 5.9881 - val_loss: 6.4408
Epoch 12/50
438/438 - 9s - 20ms/step - loss: 5.9864 - val_loss: 6.4732
Epoch 13/50
438/438 - 7s - 16ms/step - loss: 5.9216 - val_loss: 6.6493
Epoch 14/50
438/438 - 11s - 24ms/step - loss: 5.7929 - val_loss: 6.6616
Epoch 15/5

<keras.src.callbacks.history.History at 0x78f01ef34c10>

In [33]:
mse = model.evaluate(X_test, y_test)
print(f'Test MSE: {mse}')

[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 6.4935
Test MSE: 6.488713264465332


In [34]:
# Optionally, make predictions
predictions = model.predict(X_test)

[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step


In [38]:
#Integrate into an End-to-End Pipeline
def build_pipeline(df, seq_length=30, train_ratio=0.8):
    # Filter data for noon
    noon_df = df[df['Date Time'].dt.hour == 12].copy()
    values = noon_df['T (degC)'].values

    # Create sequences
    X, y = create_sequences(values, seq_length)
    X = X.reshape((X.shape[0], X.shape[1], 1))

    # Split into train/test
    train_size = int(train_ratio * len(X))
    X_train, X_test = X[:train_size], X[train_size:]
    y_train, y_test = y[:train_size], y[train_size:]

    # Build and train model
    model = Sequential()
    model.add(LSTM(50, activation='relu', input_shape=(seq_length, 1)))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')
    model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test), verbose=2)

    # Evaluate
    mse = model.evaluate(X_test, y_test)
    print(f'Test MSE: {mse}')

    return model

# Usage
model = build_pipeline(df)

Epoch 1/50
438/438 - 9s - 21ms/step - loss: 23.3261 - val_loss: 8.6264
Epoch 2/50
438/438 - 9s - 20ms/step - loss: 7.8655 - val_loss: 7.5475
Epoch 3/50
438/438 - 10s - 23ms/step - loss: 7.0613 - val_loss: 7.2429
Epoch 4/50
438/438 - 7s - 16ms/step - loss: 6.7564 - val_loss: 6.8576
Epoch 5/50
438/438 - 10s - 23ms/step - loss: 6.4495 - val_loss: 6.9673
Epoch 6/50
438/438 - 6s - 13ms/step - loss: 6.3956 - val_loss: 6.7741
Epoch 7/50
438/438 - 12s - 27ms/step - loss: 6.1911 - val_loss: 6.4996
Epoch 8/50
438/438 - 7s - 16ms/step - loss: 6.0788 - val_loss: 6.4869
Epoch 9/50
438/438 - 6s - 14ms/step - loss: 6.0514 - val_loss: 6.2299
Epoch 10/50
438/438 - 6s - 15ms/step - loss: 5.9170 - val_loss: 6.1471
Epoch 11/50
438/438 - 10s - 22ms/step - loss: 5.8264 - val_loss: 6.1433
Epoch 12/50
438/438 - 10s - 23ms/step - loss: 5.8133 - val_loss: 6.0646
Epoch 13/50
438/438 - 10s - 23ms/step - loss: 5.7435 - val_loss: 6.2304
Epoch 14/50
438/438 - 11s - 25ms/step - loss: 5.7079 - val_loss: 6.1214
Epoch 1