# Import libraries

In [2]:
import math
import argparse
import torch
import torch.nn as nn

import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler, MinMaxScaler

In [3]:
torch.manual_seed(1)  # reproducibility

<torch._C.Generator at 0x7fa9863c78>

# Training settings

In [4]:
parser = argparse.ArgumentParser()

In [5]:
parser.add_argument('--data', default='data/weather.csv', type=str)
parser.add_argument('--history', default=8, type=int, help='sequence history (in hours)')
parser.add_argument('--horizon', default=1, type=int, help='forecasting horizon (in hours)')
parser.add_argument('--split', default=0.8, type=float, help='train/test dataset split')
parser.add_argument('--scaler', default='minmax', choices=['none', 'minmax', 'standard'], help='dataset preprocessing scaler to use')
parser.add_argument('--lr', '--learning-rate', default=0.05, type=float, help='learning rate')
parser.add_argument('--epochs', default=1000, type=int, help='number of training epochs')

_StoreAction(option_strings=['--epochs'], dest='epochs', nargs=None, const=None, default=1000, type=<class 'int'>, choices=None, help='number of training epochs', metavar=None)

In [6]:
args = parser.parse_args(args=[])

In [7]:
args = parser.parse_args(args=[])
print(args)

Namespace(data='data/weather.csv', epochs=1000, history=8, horizon=1, lr=0.05, scaler='minmax', split=0.8)


# Load data

In [8]:
# load data
print(f"loading {args.data}")
df = pd.read_csv(args.data, parse_dates=[0])

loading data/weather.csv


In [9]:
df = df[['temperature']]
print(df)

       temperature
0        52.592390
1        52.588712
2        52.585034
3        52.581356
4        52.577678
...            ...
40619    55.922000
40620    58.496000
40621    59.486000
40622    59.900000
40623    58.748000

[40624 rows x 1 columns]


# Preprocess data

In [10]:
# pre-process data
if args.scaler == 'minmax':
    scaler = MinMaxScaler(feature_range=(-1, 1))
elif args.scaler == 'standard':
    scaler = StandardScaler()
else:
    scaler = None

if scaler:
    data = scaler.fit_transform(df.values)
else:
    data = df.values
    
print(data)
print(data.shape)

[[0.27535059]
 [0.27528451]
 [0.27521844]
 ...
 [0.39919159]
 [0.40662894]
 [0.38593371]]
(40624, 1)


# Create PyTorch datasets

In [11]:
# create PyTorch datasets
def to_pytorch(array):
    return torch.from_numpy(array).type(torch.FloatTensor).cuda()

def to_numpy(tensor):
    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
    
def unscale(array, resize=None):
    if not scaler: return array
    if len(array.shape) == 0: array = array.reshape(-1, 1)
    if len(array.shape) == 1: array = np.expand_dims(array, 0)
    #if resize: array = np.concatenate((np.zeros((resize[0], resize[1]-1)), array), axis=1)
    array = scaler.inverse_transform(array)
    #return array[:,-1] if resize else array
    return array
    
def generate_sequences(data, sequence_length):
    if sequence_length == 1:
        return np.expand_dims(data,1)
    seq = []
    for index in range(len(data) - sequence_length): 
        seq.append(data[index : index + sequence_length]) 
    return np.array(seq)
            
def create_dataset(data, history, horizon):
    # shift the data by the forecast length
    x = data[:-horizon,:]
    y = np.roll(data[:,-1],-horizon,axis=0)[:-horizon]
    
    # generate sequences
    x = generate_sequences(x, history)
    y = generate_sequences(y, history)[:,-1]
    
    # cast to pytorch tensors
    x = to_pytorch(x)
    y = to_pytorch(y).unsqueeze(dim=-1)
    
    return x, y

train_split = int(len(data) * args.split)

x_train, y_train = create_dataset(data[:train_split,:], args.history, args.horizon)
x_test, y_test = create_dataset(data[train_split:,:], args.history, args.horizon)
 
print('x_train', x_train.shape)
print('y_train', y_train.shape)

print('x_test', x_test.shape)
print('y_test', y_test.shape)

x_train torch.Size([32490, 8, 1])
y_train torch.Size([32490, 1])
x_test torch.Size([8116, 8, 1])
y_test torch.Size([8116, 1])


# Create a model

In [12]:
# create model
class GRU(nn.Module):
    def __init__(self, input_dim=1, output_dim=1, hidden_dim=32, num_layers=2):
        super(GRU, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.gru = nn.GRU(input_dim, hidden_dim, num_layers, batch_first=True, dropout=0.0)
        self.fc1 = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_().cuda()
        # UserWarning: RNN module weights are not part of single contiguous chunk of memory
        #self.gru.flatten_parameters()   
        x, (hn) = self.gru(x, (h0.detach()))
        x = self.fc1(x[:, -1, :]) 
        return x

net = GRU(x_train.shape[-1], 1).cuda()

# Create a loss function

In [13]:
# create loss function and solver
criterion = torch.nn.MSELoss().cuda()
optimizer = torch.optim.Adam(net.parameters(), lr=args.lr)  
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 250, 0.5)

def RMSE(y_pred, y):
    return math.sqrt((np.square(unscale(to_numpy(y_pred)) - unscale(to_numpy(y)))).mean(axis=0).item())

# Train a model

In [14]:
# train
MAX_EPOCHS = 200  # or any cap you'd like
args.epochs = min(args.epochs, MAX_EPOCHS)

for epoch in range(args.epochs):
    net.train()
    
    y_pred = net(x_train)
    train_loss = criterion(y_pred, y_train)
    train_rmse = RMSE(y_pred, y_train)
    
    optimizer.zero_grad()
    train_loss.backward()
    optimizer.step()
    scheduler.step()
    
    net.eval()
    
    with torch.no_grad():
        y_pred = net(x_test)
        test_loss = criterion(y_pred, y_test)
        test_rmse = RMSE(y_pred, y_test)
        #unscaled_loss = unscale(np.array(loss.item())).item()
        #unscaled_test_loss = unscale(np.array(test_loss.item())).item()
        print(f"Epoch {epoch:03d}  LR={scheduler.get_last_lr()[0]}  train_loss={train_loss:.8f}  test_loss={test_loss:.8f}  train_rmse={train_rmse:.8f}  test_err={test_rmse:.8f}")


Epoch 000  LR=0.05  train_loss=0.05489146  test_loss=1.43279862  train_rmse=13.04171223  test_err=66.63076752
Epoch 001  LR=0.05  train_loss=1.33856404  test_loss=0.14409821  train_rmse=64.40236069  test_err=21.13058488
Epoch 002  LR=0.05  train_loss=0.15103596  test_loss=0.08149632  train_rmse=21.63328226  test_err=15.89099997
Epoch 003  LR=0.05  train_loss=0.08006199  test_loss=0.05095008  train_rmse=15.75053865  test_err=12.56477419
Epoch 004  LR=0.05  train_loss=0.04128540  test_loss=0.04803786  train_rmse=11.31046576  test_err=12.20040007
Epoch 005  LR=0.05  train_loss=0.03166641  test_loss=0.06226980  train_rmse=9.90562076  test_err=13.89059809
Epoch 006  LR=0.05  train_loss=0.04089048  test_loss=0.05386540  train_rmse=11.25623979  test_err=12.91924498
Epoch 007  LR=0.05  train_loss=0.03482650  test_loss=0.04029753  train_rmse=10.38812455  test_err=11.17432911
Epoch 008  LR=0.05  train_loss=0.03045428  test_loss=0.04020616  train_rmse=9.71418562  test_err=11.16165364
Epoch 009  L

# Print out actual vs predicted values   

In [15]:
# print out actual vs predicted values     
#x_test = to_numpy(x_test)
y_test = to_numpy(y_test)
y_pred = to_numpy(y_pred)
 
if scaler:
    #x_test = unscale(x_test)
    y_test = unscale(y_test, x_test.shape)
    y_pred = unscale(y_pred, x_test.shape)

print('')
#print('x_test', x_test)
print('y_test', y_test)
print('y_pred', y_pred)



y_test [[40.82    ]
 [43.07    ]
 [44.852   ]
 ...
 [58.496   ]
 [59.485996]
 [59.899998]]
y_pred [[40.78715 ]
 [41.264645]
 [43.668705]
 ...
 [57.370827]
 [59.271862]
 [59.845745]]


In [17]:
import matplotlib
matplotlib.use('Agg')  # Use non-GUI backend
import matplotlib.pyplot as plt
import numpy as np

# Flatten the arrays (if needed)
y_test_flat = y_test.flatten()
y_pred_flat = y_pred.flatten()

# Generate index or time steps for the x-axis
x = np.arange(len(y_test_flat))

# Plot
plt.figure(figsize=(12, 6))
plt.plot(x, y_test_flat, label='Actual', color='blue')
plt.plot(x, y_pred_flat, label='Predicted', color='orange')
plt.title('Actual vs Predicted Values')
plt.xlabel('Time Step / Sample Index')
plt.ylabel('Value')
plt.legend()
plt.grid(True)
plt.tight_layout()

# Save the figure
plt.savefig('actual_vs_predicted.png')  # You can change the filename and format
plt.close()  # Close the figure to free memory


In [4]:
!zip -r ../Group17_Assignment.zip .


  adding: data/ (stored 0%)
  adding: data/.ipynb_checkpoints/ (stored 0%)
  adding: data/.ipynb_checkpoints/solar_power-checkpoint.csv (deflated 62%)
  adding: data/.ipynb_checkpoints/solar_power-checkpoint.jpg (deflated 14%)
  adding: data/.ipynb_checkpoints/shuttle-checkpoint.csv (deflated 72%)
  adding: data/weather.jpg (deflated 10%)
  adding: data/solar_power.jpg (deflated 14%)
  adding: data/weather_temperature.jpg (deflated 14%)
  adding: data/weather.csv (deflated 76%)
  adding: data/solar_power.csv (deflated 62%)
  adding: data/shuttle.csv (deflated 72%)
  adding: Module-1-Lab-solar-power-pred.py (deflated 44%)
  adding: .ipynb_checkpoints/ (stored 0%)
  adding: .ipynb_checkpoints/Module-1-Lab-space-shuttle-class-checkpoint.ipynb (deflated 75%)
  adding: .ipynb_checkpoints/Module-1-Lab-weather-pred-hand-coded-checkpoint.ipynb (deflated 76%)
  adding: .ipynb_checkpoints/Module-1-Lab-weather-pred-checkpoint.ipynb (deflated 37%)
  adding: .ipynb_checkpoints/Module-1-Lab-solar-po