In [6]:
import pandas as pd
import numpy as np
from scipy.fft import fft, ifft
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

In [7]:
def preprocess(file_path, column_names, input_size, output_size):
    df = pd.read_csv(file_path)
    data = df[column_names].values
    scaler = StandardScaler()
    normalized = scaler.fit_transform(data)
    
    inputs = []
    outputs = []
    
    for i in range(normalized.shape[1]):
        col_data = normalized[:, i]  
        num_windows = len(col_data) // (input_size + output_size)
        
        for j in range(num_windows):
            start_idx = j * (input_size + output_size)
            end_idx = start_idx + input_size
            input_window = col_data[start_idx:end_idx]
            output_window = col_data[end_idx:end_idx + output_size]
            inputs.append(input_window)
            outputs.append(output_window)
            
    inputs = np.array(inputs)
    outputs = np.array(outputs)
    
    return inputs, outputs

train_inputs, train_outputs = preprocess('/kaggle/input/electricity/sample.csv', ['column1', 'column2', 'column3'], 3, 2)

print("Train Inputs:\n", train_inputs)
print("Train Outputs:\n", train_outputs)

Train Inputs:
 [[-1.5666989  -1.21854359 -0.87038828]
 [ 0.17407766  0.52223297  0.87038828]
 [-1.5666989  -1.21854359 -0.87038828]
 [ 0.17407766  0.52223297  0.87038828]
 [-1.5666989  -1.21854359 -0.87038828]
 [ 0.17407766  0.52223297  0.87038828]]
Train Outputs:
 [[-0.52223297 -0.17407766]
 [ 1.21854359  1.5666989 ]
 [-0.52223297 -0.17407766]
 [ 1.21854359  1.5666989 ]
 [-0.52223297 -0.17407766]
 [ 1.21854359  1.5666989 ]]


In [10]:
def apply_transform(signal):
    frequency_domain = np.fft.fft(signal, axis=1) 
    real_part = frequency_domain.real
    imag_part = frequency_domain.imag
    combined = np.concatenate((real_part, imag_part), axis=1)
    return combined

def apply_inverse(combined):
    half_size = combined.shape[1] // 2
    real_part = combined[:, :half_size]
    imag_part = combined[:, half_size:]
    complex_signal = real_part + 1j * imag_part
    time_domain_signal = np.fft.ifft(complex_signal, axis=1)
    return time_domain_signal.real

tmp = apply_transform(train_outputs)
print(tmp)
print(apply_inverse(tmp))

[[-0.69631062 -0.34815531  0.          0.        ]
 [ 2.7852425  -0.34815531  0.          0.        ]
 [-0.69631062 -0.34815531  0.          0.        ]
 [ 2.7852425  -0.34815531  0.          0.        ]
 [-0.69631062 -0.34815531  0.          0.        ]
 [ 2.7852425  -0.34815531  0.          0.        ]]
[[-0.52223297 -0.17407766]
 [ 1.21854359  1.5666989 ]
 [-0.52223297 -0.17407766]
 [ 1.21854359  1.5666989 ]
 [-0.52223297 -0.17407766]
 [ 1.21854359  1.5666989 ]]


In [11]:
class FullyConnectedNN(nn.Module):
    def __init__(self, input_size, output_size, hidden_layers, hidden_units):
        super(FullyConnectedNN, self).__init__()
        
        self.input_size = input_size
        self.output_size = output_size
        self.hidden_layers = hidden_layers
        self.hidden_units = hidden_units
        
        layers = []

        layers.append(nn.Linear(self.input_size, self.hidden_units))
        layers.append(nn.ReLU())

        for _ in range(self.hidden_layers - 1):
            layers.append(nn.Linear(self.hidden_units, self.hidden_units))
            layers.append(nn.ReLU())

        layers.append(nn.Linear(self.hidden_units, self.output_size))

        self.model = nn.Sequential(*layers)
    
    def forward(self, x):
        return self.model(x)


In [15]:
def train_model(model, train_inputs, train_outputs, val_inputs, val_outputs, num_epochs=20, learning_rate=0.001, batch_size=32):

    criterion = nn.MSELoss()  
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    train_dataset = torch.utils.data.TensorDataset(torch.tensor(train_inputs).float(), torch.tensor(train_outputs).float())
    val_dataset = torch.utils.data.TensorDataset(torch.tensor(val_inputs).float(), torch.tensor(val_outputs).float())
    
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    
    model.to(device)
    
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            
            optimizer.zero_grad()  
            outputs = model(inputs)  
            loss = criterion(outputs, targets) 
            loss.backward()  
            optimizer.step()  
            
            train_loss += loss.item() * inputs.size(0)
        
        train_loss /= len(train_loader.dataset)
        
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for inputs, targets in val_loader:
                inputs, targets = inputs.to(device), targets.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                val_loss += loss.item() * inputs.size(0)
        
        val_loss /= len(val_loader.dataset)
        
        if epoch % 5 == 0:
            print(f"Epoch {epoch + 1}/{num_epochs}, Training Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}")

In [13]:
def predict(model, inputs):
    model.eval()
    inputs = torch.tensor(inputs).float().to(device)
    
    with torch.no_grad():  
        outputs = model(inputs)  
    outputs = outputs.cpu().numpy()
    return outputs

def measure_mse(actual, predicted):
    mse = mean_squared_error(actual, predicted)
    return mse

In [23]:
def pipeline():
    #Read and preprocess CSV files
    C = 1000
    L = 50
    train_inputs, train_outputs = preprocess('/kaggle/input/electricity/ETTm1.csv', ['HUFL', 'HULL', 'MUFL', 'MULL', 'LUFL'], C, L)
    val_inputs, val_outputs = preprocess('/kaggle/input/electricity/ETTm1.csv', ['LULL'], C, L)
    test_inputs, test_outputs = preprocess('/kaggle/input/electricity/ETTm1.csv', ['OT'], C, L)

    #Calculate the attribute vectors for all the train outputs
    train_outputs_attr = apply_transform(train_outputs)
    val_outputs_attr = apply_transform(val_outputs)
    
    # Initialize the model
    model = FullyConnectedNN(C, 2*L, 5, 128)
    
    # Train the model
    train_model(model, train_inputs, train_outputs_attr, val_inputs, val_outputs_attr, 100)

    #Apply the model to make inferences on the test data inputs
    predicted_outputs = predict(model, test_inputs)

    #Apply inverse DTFT on the predicted outputs to get back in time domain
    result = apply_inverse(predicted_outputs)

    #Measure MSE between predicted and actual outputs
    mse = measure_mse(test_outputs, result)
    print(f'Average MSE: {mse:.4f}')
    print(result)
    print(test_outputs)
 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
pipeline()

Using device: cuda
Epoch 1/100, Training Loss: 22.2620, Validation Loss: 22.5418
Epoch 6/100, Training Loss: 9.4286, Validation Loss: 6.1188
Epoch 11/100, Training Loss: 6.4682, Validation Loss: 6.3694
Epoch 16/100, Training Loss: 5.0529, Validation Loss: 7.7710
Epoch 21/100, Training Loss: 3.5383, Validation Loss: 6.7647
Epoch 26/100, Training Loss: 2.9137, Validation Loss: 6.7992
Epoch 31/100, Training Loss: 2.2711, Validation Loss: 7.5745
Epoch 36/100, Training Loss: 2.1567, Validation Loss: 7.0464
Epoch 41/100, Training Loss: 1.9291, Validation Loss: 6.9409
Epoch 46/100, Training Loss: 2.7101, Validation Loss: 7.8915
Epoch 51/100, Training Loss: 1.6516, Validation Loss: 6.9124
Epoch 56/100, Training Loss: 1.4637, Validation Loss: 6.6464
Epoch 61/100, Training Loss: 1.5834, Validation Loss: 6.7873
Epoch 66/100, Training Loss: 1.2970, Validation Loss: 6.4707
Epoch 71/100, Training Loss: 1.2275, Validation Loss: 6.5852
Epoch 76/100, Training Loss: 1.3201, Validation Loss: 6.5520
Epoch