In [1]:
import pandas as pd 
import matplotlib.pyplot as plt
import numpy as np
import datetime
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader

In [2]:
training_data = pd.read_csv("organized_data_training.csv")

In [3]:
print(training_data)

        day_of_week  hour_of_day  normalized_energy  average_energy       t-3  \
0               6.0          3.0           0.234568        0.076454  0.114212   
1               6.0          4.0           0.234568        0.072333  0.132798   
2               6.0          5.0           0.234568        0.071824  0.148183   
3               6.0          6.0           0.172840        0.077041  0.158114   
4               6.0          7.0           0.160494        0.092937  0.162235   
...             ...          ...                ...             ...       ...   
196921          5.0         16.0           0.009524        0.122585 -0.110656   
196922          5.0         17.0           0.006349        0.141208 -0.098425   
196923          5.0         18.0           0.009524        0.138830 -0.097711   
196924          5.0         19.0           0.006349        0.142863 -0.113061   
196925          5.0         20.0           0.006349        0.149307 -0.134859   

             t-2       t-1 

In [4]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(3, 1)   # input layer with 3 inputs and output layer with 1 output

    def forward(self, x):
        x = self.fc1(x)   # pass the input through the linear layer
        x = torch.tanh(x)  # apply the tanh activation function to the output of the linear layer
        return x

In [5]:
net = Net()

In [23]:
training_data_array = training_data.values

In [24]:
train_x = training_data_array[:1000, 4:7]
train_y = training_data_array[:1000,-1:]

In [25]:
train_x

array([[ 0.1142123 ,  0.13279826,  0.14818289],
       [ 0.13279826,  0.14818289,  0.15811366],
       [ 0.14818289,  0.15811366,  0.16223509],
       ...,
       [ 0.04348886,  0.02785388,  0.04091381],
       [ 0.02785388,  0.04091381,  0.05025492],
       [ 0.04091381,  0.05025492, -0.04244261]])

In [26]:
train_x = torch.tensor(train_x, dtype=torch.float32)
train_y = torch.tensor(train_y, dtype=torch.float32)

In [27]:
dataset = TensorDataset(train_x, train_y)

# create a PyTorch dataloader
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

In [28]:
loss_fn = torch.nn.MSELoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.01)

In [29]:
num_epochs = 10

for epoch in range(num_epochs):
    for batch_x, batch_y in dataloader:
        # forward pass
        pred_y = net(batch_x)

        # compute the loss
        loss = loss_fn(pred_y, batch_y)

        # backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # print the epoch and loss
    print(f"Epoch {epoch+1}: Loss = {loss.item():.4f}")

Epoch 1: Loss = 0.0586
Epoch 2: Loss = 0.0151
Epoch 3: Loss = 0.0139
Epoch 4: Loss = 0.0055
Epoch 5: Loss = 0.0248
Epoch 6: Loss = 0.0083
Epoch 7: Loss = 0.0016
Epoch 8: Loss = 0.0059
Epoch 9: Loss = 0.0137
Epoch 10: Loss = 0.0040


In [34]:
prediction = net.forward(train_x)

In [36]:
prediction.detach().numpy()

array([[ 1.19710401e-01],
       [ 1.27793759e-01],
       [ 1.31556839e-01],
       [ 1.31979674e-01],
       [ 1.10283792e-01],
       [ 6.19655102e-02],
       [ 6.50787503e-02],
       [ 8.50349292e-02],
       [ 1.08745046e-01],
       [ 1.32132486e-01],
       [ 1.04431927e-01],
       [ 1.07868426e-01],
       [ 9.48083028e-02],
       [ 1.04075946e-01],
       [ 9.86773446e-02],
       [ 8.66094232e-02],
       [ 8.62819031e-02],
       [ 1.75026521e-01],
       [ 2.77613223e-01],
       [ 2.02083409e-01],
       [ 1.48318350e-01],
       [ 7.04932362e-02],
       [ 1.02336392e-01],
       [ 1.20446838e-01],
       [ 1.32615238e-01],
       [ 1.38896585e-01],
       [ 1.41266793e-01],
       [ 1.21918574e-01],
       [ 8.11835006e-02],
       [ 7.33847246e-02],
       [ 7.33672455e-02],
       [ 6.92920461e-02],
       [ 9.50543135e-02],
       [ 9.53116044e-02],
       [ 1.03657790e-01],
       [ 1.03394210e-01],
       [ 1.15207620e-01],
       [ 1.31930023e-01],
       [ 7.6

In [8]:
test_data = pd.read_csv("HW4_Test_Data.csv")

In [9]:
test_data

Unnamed: 0,TestTime,TestBldg
0,9/7/14 00:00,0.094276
1,9/7/14 01:00,0.037037
2,9/7/14 02:00,0.040404
3,9/7/14 03:00,0.037037
4,9/7/14 04:00,0.037037
...,...,...
163,9/13/14 19:00,0.164983
164,9/13/14 20:00,0.235690
165,9/13/14 21:00,0.185185
166,9/13/14 22:00,0.168350


In [14]:
size = len(test_data)
random_data = np.random.rand(size, 3)
time_splited_data = pd.DataFrame(random_data, columns=[ 'day_of_week', 'hour_of_day',"normalized_energy"])
for i in range(len(test_data)):
    temp = test_data.loc[i]
    date_string = temp["TestTime"]
    date_obj = datetime.datetime.strptime(date_string, '%m/%d/%y %H:%M')
    day_of_week = date_obj.weekday()
    hour_of_day = date_obj.hour
    value = test_data.loc[i,"TestBldg"]
    time_splited_data.loc[i] = [day_of_week,hour_of_day, value ]

In [15]:
sorted_time_splited_data = time_splited_data.sort_values(by=['day_of_week', 'hour_of_day'], ascending=[True,True])

In [16]:
sorted_time_splited_data = sorted_time_splited_data.reset_index(drop=True)
sorted_time_splited_data

Unnamed: 0,day_of_week,hour_of_day,normalized_energy
0,0.0,0.0,0.188552
1,0.0,1.0,0.084175
2,0.0,2.0,0.057239
3,0.0,3.0,0.040404
4,0.0,4.0,0.040404
...,...,...,...
163,6.0,19.0,0.094276
164,6.0,20.0,0.151515
165,6.0,21.0,0.175084
166,6.0,22.0,0.151515
