In [1]:
import matplotlib.pyplot as plt
import csv
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

## Load and preprocess the data

We will use data from past 7 days to predict next day temperature.
Since every feature has values with varying ranges, we do normalization to confine feature values to a range of [0, 1] before training a neural network. We do this by subtracting the mean and dividing by the standard deviation of each feature.

80 % of the data will be used to train the model, i.e. 3650 * 0.8 rows. split_fraction can be changed to alter this percentage.

In [11]:
import pandas as pd
df = pd.read_csv('daily-min-temperatures.csv')
df.head()

Unnamed: 0,Date,Temp
0,1981-01-01,20.7
1,1981-01-02,17.9
2,1981-01-03,18.8
3,1981-01-04,14.6
4,1981-01-05,15.8


In [12]:
df["Date"]

0       1981-01-01
1       1981-01-02
2       1981-01-03
3       1981-01-04
4       1981-01-05
           ...    
3645    1990-12-27
3646    1990-12-28
3647    1990-12-29
3648    1990-12-30
3649    1990-12-31
Name: Date, Length: 3650, dtype: object

##### B

In [16]:
from sklearn.model_selection import train_test_split

# Normalize the temperatures
df['Temp'] = (df['Temp'] - df['Temp'].mean()) / df['Temp'].std()

X_train, X_test, y_train, y_test = train_test_split(df['Date'], df['Temp'], test_size=0.2, random_state=42)

print(y_train.std())
print(y_train.mean())

0.9958783604892041
-0.0026863443783818773


##### C

In [27]:
import torch
from torch.utils.data import Dataset

class TempSequenceDataset(Dataset):
    def __init__(self, temps, sequence_length=7):
        self.sequence_length = sequence_length
        self.temps = torch.tensor(temps.values, dtype=torch.float32)
        self.samples = []
        self.targets = []

        for i in range(len(self.temps) - sequence_length):
            self.samples.append(self.temps[i:i+sequence_length])    # sequence  
            self.targets.append(self.temps[i+sequence_length])      # target

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        return self.samples[idx], self.targets[idx]
    

# Only pass in the normalized temperature column
train_dataset = TempSequenceDataset(df['Temp'][:int(len(df)*0.8)])
test_dataset = TempSequenceDataset(df['Temp'][int(len(df)*0.8):])
train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=256)

train_dataset[0]

(tensor([2.3386, 1.6509, 1.8719, 0.8405, 1.1352, 1.1352, 1.1352]),
 tensor(1.5281))

##### D

In [35]:
import torch.optim as optim

# Define the LSTM model
class LSTMModel(nn.Module):
    def __init__(self, input_size=1, hidden_dim=128, num_layers=1):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_dim, num_layers=num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        last_out = lstm_out[:, -1, :]  # use last time step output
        out = self.fc(last_out)
        return out.squeeze()

# Initialize model, loss, optimizer
model = LSTMModel().to(device)


##### E

In [33]:
# Training loop
epochs = 100
learning_rate = 0.001

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)

        optimizer.zero_grad()
        output = model(X_batch)
        loss = criterion(output, y_batch)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs} - Loss: {total_loss / len(train_loader):.4f}")

# Optional: Evaluate on test set
model.eval()
with torch.no_grad():
    test_losses = []
    for X_batch, y_batch in test_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        output = model(X_batch)
        loss = criterion(output, y_batch)
        test_losses.append(loss.item())

    print(f"Test Loss: {sum(test_losses) / len(test_losses):.4f}")

Epoch 1/100 - Loss: 0.8821
Epoch 2/100 - Loss: 0.5781
Epoch 3/100 - Loss: 0.5046
Epoch 4/100 - Loss: 0.4843
Epoch 5/100 - Loss: 0.4700
Epoch 6/100 - Loss: 0.4754
Epoch 7/100 - Loss: 0.4579
Epoch 8/100 - Loss: 0.4497
Epoch 9/100 - Loss: 0.4466
Epoch 10/100 - Loss: 0.4422
Epoch 11/100 - Loss: 0.4303
Epoch 12/100 - Loss: 0.4202
Epoch 13/100 - Loss: 0.4037
Epoch 14/100 - Loss: 0.3884
Epoch 15/100 - Loss: 0.3925
Epoch 16/100 - Loss: 0.3868
Epoch 17/100 - Loss: 0.3801
Epoch 18/100 - Loss: 0.3781
Epoch 19/100 - Loss: 0.3758
Epoch 20/100 - Loss: 0.3726
Epoch 21/100 - Loss: 0.3744
Epoch 22/100 - Loss: 0.3685
Epoch 23/100 - Loss: 0.3708
Epoch 24/100 - Loss: 0.3742
Epoch 25/100 - Loss: 0.3717
Epoch 26/100 - Loss: 0.3714
Epoch 27/100 - Loss: 0.3724
Epoch 28/100 - Loss: 0.3726
Epoch 29/100 - Loss: 0.3685
Epoch 30/100 - Loss: 0.3748
Epoch 31/100 - Loss: 0.3661
Epoch 32/100 - Loss: 0.3691
Epoch 33/100 - Loss: 0.3702
Epoch 34/100 - Loss: 0.3743
Epoch 35/100 - Loss: 0.3764
Epoch 36/100 - Loss: 0.3670
E

##### F

In [36]:
model = LSTMModel(num_layers=2).to(device)
# Training loop
epochs = 100
learning_rate = 0.001

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)

        optimizer.zero_grad()
        output = model(X_batch)
        loss = criterion(output, y_batch)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs} - Loss: {total_loss / len(train_loader):.4f}")

# Optional: Evaluate on test set
model.eval()
with torch.no_grad():
    test_losses = []
    for X_batch, y_batch in test_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        output = model(X_batch)
        loss = criterion(output, y_batch)
        test_losses.append(loss.item())

    print(f"Test Loss: {sum(test_losses) / len(test_losses):.4f}")

Epoch 1/100 - Loss: 0.8684
Epoch 2/100 - Loss: 0.5510
Epoch 3/100 - Loss: 0.5105
Epoch 4/100 - Loss: 0.4992
Epoch 5/100 - Loss: 0.4896
Epoch 6/100 - Loss: 0.4798
Epoch 7/100 - Loss: 0.4745
Epoch 8/100 - Loss: 0.4567
Epoch 9/100 - Loss: 0.4399
Epoch 10/100 - Loss: 0.4162
Epoch 11/100 - Loss: 0.3856
Epoch 12/100 - Loss: 0.3728
Epoch 13/100 - Loss: 0.3725
Epoch 14/100 - Loss: 0.3664
Epoch 15/100 - Loss: 0.3759
Epoch 16/100 - Loss: 0.3862
Epoch 17/100 - Loss: 0.3744
Epoch 18/100 - Loss: 0.3689
Epoch 19/100 - Loss: 0.3703
Epoch 20/100 - Loss: 0.3755
Epoch 21/100 - Loss: 0.3719
Epoch 22/100 - Loss: 0.3734
Epoch 23/100 - Loss: 0.3724
Epoch 24/100 - Loss: 0.3703
Epoch 25/100 - Loss: 0.3706
Epoch 26/100 - Loss: 0.3711
Epoch 27/100 - Loss: 0.3680
Epoch 28/100 - Loss: 0.3770
Epoch 29/100 - Loss: 0.3662
Epoch 30/100 - Loss: 0.3719
Epoch 31/100 - Loss: 0.3712
Epoch 32/100 - Loss: 0.3721
Epoch 33/100 - Loss: 0.3715
Epoch 34/100 - Loss: 0.3623
Epoch 35/100 - Loss: 0.3710
Epoch 36/100 - Loss: 0.3697
E

In [37]:
model = LSTMModel(num_layers=3).to(device)
# Training loop
epochs = 100
learning_rate = 0.001

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)

        optimizer.zero_grad()
        output = model(X_batch)
        loss = criterion(output, y_batch)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs} - Loss: {total_loss / len(train_loader):.4f}")

# Optional: Evaluate on test set
model.eval()
with torch.no_grad():
    test_losses = []
    for X_batch, y_batch in test_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        output = model(X_batch)
        loss = criterion(output, y_batch)
        test_losses.append(loss.item())

    print(f"Test Loss: {sum(test_losses) / len(test_losses):.4f}")

Epoch 1/100 - Loss: 0.8964
Epoch 2/100 - Loss: 0.5672
Epoch 3/100 - Loss: 0.5283
Epoch 4/100 - Loss: 0.5213
Epoch 5/100 - Loss: 0.5166
Epoch 6/100 - Loss: 0.5071
Epoch 7/100 - Loss: 0.4969
Epoch 8/100 - Loss: 0.4846
Epoch 9/100 - Loss: 0.4754
Epoch 10/100 - Loss: 0.4346
Epoch 11/100 - Loss: 0.4036
Epoch 12/100 - Loss: 0.3841
Epoch 13/100 - Loss: 0.3764
Epoch 14/100 - Loss: 0.3753
Epoch 15/100 - Loss: 0.3732
Epoch 16/100 - Loss: 0.3741
Epoch 17/100 - Loss: 0.3767
Epoch 18/100 - Loss: 0.3734
Epoch 19/100 - Loss: 0.3748
Epoch 20/100 - Loss: 0.3835
Epoch 21/100 - Loss: 0.3728
Epoch 22/100 - Loss: 0.3669
Epoch 23/100 - Loss: 0.3679
Epoch 24/100 - Loss: 0.3697
Epoch 25/100 - Loss: 0.3674
Epoch 26/100 - Loss: 0.3665
Epoch 27/100 - Loss: 0.3677
Epoch 28/100 - Loss: 0.3621
Epoch 29/100 - Loss: 0.3668
Epoch 30/100 - Loss: 0.3627
Epoch 31/100 - Loss: 0.3636
Epoch 32/100 - Loss: 0.3653
Epoch 33/100 - Loss: 0.3681
Epoch 34/100 - Loss: 0.3672
Epoch 35/100 - Loss: 0.3643
Epoch 36/100 - Loss: 0.3670
E

The loss gets worse with more layers

##### G

In [38]:
import pandas as pd
import torch
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
import torch.optim as optim

# Load and normalize the dataset
df = pd.read_csv('daily-min-temperatures.csv')
df['Temp'] = (df['Temp'] - df['Temp'].mean()) / df['Temp'].std()

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Custom Dataset
class TempSequenceDataset(Dataset):
    def __init__(self, temps, sequence_length=7, t=1):  # t-day ahead prediction
        self.sequence_length = sequence_length
        self.t = t
        self.temps = torch.tensor(temps.values, dtype=torch.float32)
        self.samples = []
        self.targets = []

        for i in range(len(self.temps) - sequence_length - t + 1):
            self.samples.append(self.temps[i:i+sequence_length])
            self.targets.append(self.temps[i+sequence_length + t - 1])

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        return self.samples[idx].unsqueeze(1), self.targets[idx]  # (seq_len, 1)


# Split and create datasets
train_dataset = TempSequenceDataset(df['Temp'][:int(len(df)*0.8)])
test_dataset = TempSequenceDataset(df['Temp'][int(len(df)*0.8):])

train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=256)

# Define the LSTM model
class LSTMModel(nn.Module):
    def __init__(self, input_size=1, hidden_dim=128, num_layers=3):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_dim, num_layers=num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        last_out = lstm_out[:, -1, :]  # last time step
        out = self.fc(last_out)
        return out.squeeze()

def train_and_evaluate(t):
    print(f"\n=== Predicting {t}-day ahead ===")
    train_dataset = TempSequenceDataset(df['Temp'][:int(len(df)*0.8)], t=t)
    test_dataset = TempSequenceDataset(df['Temp'][int(len(df)*0.8):], t=t)

    train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=256)

    model = LSTMModel().to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    for epoch in range(30):  # Reduce for quicker testing
        model.train()
        total_loss = 0
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            output = model(X_batch)
            loss = criterion(output, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1}/30 - Loss: {total_loss / len(train_loader):.4f}")

    # Evaluate
    model.eval()
    with torch.no_grad():
        test_losses = []
        for X_batch, y_batch in test_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            output = model(X_batch)
            loss = criterion(output, y_batch)
            test_losses.append(loss.item())
        print(f"Test Loss for t={t}: {sum(test_losses) / len(test_losses):.4f}")

for t in [1, 3, 5]:
    train_and_evaluate(t)



=== Predicting 1-day ahead ===
Epoch 1/30 - Loss: 0.8754
Epoch 2/30 - Loss: 0.5777
Epoch 3/30 - Loss: 0.5423
Epoch 4/30 - Loss: 0.5220
Epoch 5/30 - Loss: 0.5198
Epoch 6/30 - Loss: 0.5064
Epoch 7/30 - Loss: 0.5032
Epoch 8/30 - Loss: 0.4948
Epoch 9/30 - Loss: 0.4708
Epoch 10/30 - Loss: 0.4569
Epoch 11/30 - Loss: 0.4176
Epoch 12/30 - Loss: 0.3961
Epoch 13/30 - Loss: 0.3791
Epoch 14/30 - Loss: 0.3745
Epoch 15/30 - Loss: 0.3727
Epoch 16/30 - Loss: 0.3725
Epoch 17/30 - Loss: 0.3758
Epoch 18/30 - Loss: 0.3715
Epoch 19/30 - Loss: 0.3712
Epoch 20/30 - Loss: 0.3775
Epoch 21/30 - Loss: 0.3748
Epoch 22/30 - Loss: 0.3778
Epoch 23/30 - Loss: 0.3722
Epoch 24/30 - Loss: 0.3696
Epoch 25/30 - Loss: 0.3723
Epoch 26/30 - Loss: 0.3686
Epoch 27/30 - Loss: 0.3660
Epoch 28/30 - Loss: 0.3671
Epoch 29/30 - Loss: 0.3767
Epoch 30/30 - Loss: 0.3681
Test Loss for t=1: 0.3085

=== Predicting 3-day ahead ===
Epoch 1/30 - Loss: 0.9242
Epoch 2/30 - Loss: 0.5881
Epoch 3/30 - Loss: 0.5591
Epoch 4/30 - Loss: 0.5507
Epoch

The loss gets worse as you try to predict more days in the future