In [None]:
import os
import pandas as pd

# 1. Data Load

- setting the data path
- load the data with **pandas**

In [None]:
data_dir = '../data/'
k_gas_dir = os.path.join(data_dir, 'k-gas') # k-gas data directory
k_gas_df = pd.read_csv(os.path.join(k_gas_dir, 'Gas sales with temperature.csv'))

## show the data

In [None]:
print('k-gas data shape: ', k_gas_df.shape)
print()
print('k-gas data columns: ', k_gas_df.columns)
print()
print('k-gas data head: ', k_gas_df.head())

# 2. Split the data based on **your purpose**

- This dataset is sequential data, so we need to split the data based on `Year`.

- So, I split the data based on `Year`.

In [None]:
# Create train, validation, and test sets
train_df = k_gas_df[k_gas_df['Year'] < 2018]
val_df = k_gas_df[k_gas_df['Year'].isin([2018, 2019])]
test_df = k_gas_df[k_gas_df['Year'] == 2020]

# 3. Make the Model

- import torch library

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

- We will use **MLP** model.

In [None]:
# Define the MLP model
class My_MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(My_MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.bn1 = nn.BatchNorm1d(hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = self.fc1(x)
        x = self.bn1(x)
        x = torch.relu(x)
        x = self.fc2(x)
        return x

- set the **hyper-parameters**

In [None]:
features = ['Gangwondo', 'Seoul', 'Gyeonggido', 'Incheon', 'Gyeongsangnamdo', 'Gyeongsangbukdo', 'Gwangju', 'Daegu', 'Daejeon', 'Busan', 'Sejong', 'Ulsan', 'Jeollanamdo', 'Jeollabukdo', 'Jeju', 'Chungcheongnamdo', 'Chungcheongbukdo']

In [None]:
input_size = len(features)  # number of features
hidden_size = 50  # size of hidden state of RNN
output_size = 1  # output size
learning_rate = 0.001
batch_size = 8

- Check your `My_MLP` model

In [None]:
model = My_MLP(input_size, hidden_size, output_size)

In [None]:
print(model)

- Loss and Optimizer

In [None]:
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# 3. Data Pre-processing (Cusmtom Dataset)

- A Year is 12 months, so I need to split the data based on `Month`.

- I split the data based on `Month`.

In [None]:
from torch.utils.data import TensorDataset, DataLoader

- Each data, we will adapt the `create_sequence` function.

In [None]:
# Create train, validation, and test sets
x_train = train_df[features].values
y_train = train_df['Temperature'].values

x_val = val_df[features].values
y_val = val_df['Temperature'].values

x_test = test_df[features].values
y_test = test_df['Temperature'].values

## Convert the data to **torch tensor**.

In [None]:
X_train_tensor = torch.tensor(x_train).float()
y_train_tensor = torch.tensor(y_train).float()

In [None]:
print('X_train_tensor shape: ', X_train_tensor.shape)
print('Y_train_tensor shape: ', y_train_tensor.shape)

In [None]:
X_val_tensor = torch.tensor(x_val).float()
Y_val_tensor = torch.tensor(y_val).float()

In [None]:
print('X_val_tensor shape: ', X_val_tensor.shape)
print('Y_val_tensor shape: ', Y_val_tensor.shape)

In [None]:
X_test_tensor = torch.tensor(x_test).float()
Y_test_tensor = torch.tensor(y_test).float()

In [None]:
print('X_test_tensor shape: ', X_test_tensor.shape)
print('Y_test_tensor shape: ', Y_test_tensor.shape)

# 5. Data Loader

- Check the data shape.

In [None]:
print('X_train_tensor shape: ', X_train_tensor.shape)
print('X_val_tensor shape: ', X_val_tensor.shape)
print('X_test_tensor shape: ', X_test_tensor.shape)

## Create TensorDatasets

- We will use **TensorDataset** directly. Because, we already converted DataFrame to tensor and for using *mini-Batch*.

- But, In practice, we need to make **Custom Dataset** like 'GasDataset'.

In [None]:
train_data = TensorDataset(X_train_tensor, y_train_tensor)
val_data = TensorDataset(X_val_tensor, Y_val_tensor)
test_data = TensorDataset(X_test_tensor, Y_test_tensor)

## Create DataLoader

In [None]:
train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)
val_loader = DataLoader(val_data, shuffle=False, batch_size=batch_size)
test_loader = DataLoader(test_data, shuffle=False, batch_size=batch_size)

# 6. Training and Evaluation

In [None]:
# Training
for epoch in range(1000):  # number of epochs
    for inputs, labels in train_loader:
        model.train()
        optimizer.zero_grad()
        y_pred = model(inputs)
        loss = criterion(y_pred.squeeze(), labels)
        loss.backward()
        optimizer.step()

    # Evaluation
    if (epoch+1) % 100 == 0:
        model.eval()
        with torch.no_grad():
            val_losses = []
            for inputs, labels in val_loader:
                y_pred_val = model(inputs)
                val_loss = criterion(y_pred_val.squeeze(), labels)
                val_losses.append(val_loss.item())
        print(f'Epoch {epoch+1} | Train Loss: {loss.item()} | Validation Loss: {sum(val_losses) / len(val_losses)}')



In [None]:
# Testing
model.eval()
with torch.no_grad():
    test_losses = []
    for inputs, labels in test_loader:
        y_pred_test = model(inputs)
        test_loss = criterion(y_pred_test.squeeze(), labels)
        test_losses.append(test_loss.item())
print(f'Test Loss: {sum(test_losses) / len(test_losses)}')