In [None]:
import pandas as pd
import numpy as np
from scipy.fft import fft, ifft
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torch.cuda.amp import autocast, GradScaler
from sklearn.model_selection import train_test_split

In [None]:
def preprocess(file_path, column_names, input_size, output_size, skip):
    df = pd.read_csv(file_path)
    data = df[column_names].values
    scaler = StandardScaler()
    normalized = data

    inputs = []
    outputs = []

    for i in range(normalized.shape[1]):
        col_data = normalized[:, i] 
        for j in range(0, len(col_data) - input_size - output_size + 1, skip):
            input_window = col_data[j:j + input_size]
            output_window = col_data[j + input_size:j + input_size + output_size]
            inputs.append(input_window)
            outputs.append(output_window)

    inputs = np.array(inputs)
    outputs = np.array(outputs)
    
    return inputs, outputs

train_inputs, train_outputs = preprocess('/kaggle/input/electricity/sample.csv', ['column1', 'column2', 'column3'], 3, 3, 2)

print("Train Inputs:\n", train_inputs)
print("Train Outputs:\n", train_outputs)

In [None]:
import numpy as np

def apply_transform(signal):
    frequency_domain = np.fft.fft(signal, axis=1)
    magnitude = np.abs(frequency_domain) 
    phase = np.angle(frequency_domain)  
    combined = np.concatenate((magnitude, phase), axis=1)
    return combined

def apply_inverse(combined):
    half_size = combined.shape[1] // 2
    magnitude = combined[:, :half_size]
    phase = combined[:, half_size:]
    real_part = magnitude * np.cos(phase)
    imag_part = magnitude * np.sin(phase)
    complex_signal = real_part + 1j * imag_part
    time_domain_signal = np.fft.ifft(complex_signal, axis=1)
    return time_domain_signal.real


tmp = apply_transform(train_outputs)
print(tmp)
print(apply_inverse(tmp))

In [None]:
class FullyConnectedNN(nn.Module):
    def __init__(self, input_size, output_size, hidden_layers, hidden_units):
        super(FullyConnectedNN, self).__init__()
        
        self.input_size = input_size
        self.output_size = output_size
        self.hidden_layers = hidden_layers
        self.hidden_units = hidden_units
        
        layers = []

        layers.append(nn.Linear(self.input_size, self.hidden_units))
        layers.append(nn.ReLU())

        for _ in range(self.hidden_layers - 1):
            layers.append(nn.Linear(self.hidden_units, self.hidden_units))
            layers.append(nn.ReLU())

        layers.append(nn.Linear(self.hidden_units, self.output_size))

        self.model = nn.Sequential(*layers)
    
    def forward(self, x):
        return self.model(x)


In [None]:
def train_model(model, train_inputs, train_outputs, val_inputs, val_outputs, num_epochs=20, learning_rate=0.001, batch_size=32):

    criterion = nn.MSELoss()  
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    train_dataset = torch.utils.data.TensorDataset(torch.tensor(train_inputs).float(), torch.tensor(train_outputs).float())
    val_dataset = torch.utils.data.TensorDataset(torch.tensor(val_inputs).float(), torch.tensor(val_outputs).float())
    
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    
    model.to(device)
    
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            
            optimizer.zero_grad()  
            outputs = model(inputs)  
            loss = criterion(outputs, targets) 
            loss.backward()  
            optimizer.step()  
            
            train_loss += loss.item() * inputs.size(0)
        
        train_loss /= len(train_loader.dataset)
        
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for inputs, targets in val_loader:
                inputs, targets = inputs.to(device), targets.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                val_loss += loss.item() * inputs.size(0)
        
        val_loss /= len(val_loader.dataset)
        
        if epoch % 5 == 0:
            print(f"Epoch {epoch + 1}/{num_epochs}, Training Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}")

In [None]:
def predict(model, inputs):
    model.eval()
    inputs = torch.tensor(inputs).float().to(device)
    
    with torch.no_grad():  
        outputs = model(inputs)  
    outputs = outputs.cpu().numpy()
    return outputs

def measure_mse(actual, predicted):
    mse = mean_squared_error(actual, predicted)
    return mse

In [None]:
def pipeline():
    #Read and preprocess CSV files
    C = 512
    L = 96
    skip = 10
    input_data, output_data = preprocess('/kaggle/input/electricity/ETTm1.csv', ['HUFL', 'HULL', 'MUFL', 'MULL'], C, L, skip)
    test_inputs, test_outputs = preprocess('/kaggle/input/electricity/ETTm1.csv', ['LUFL', 'LULL'], C, L, skip)
    train_inputs, val_inputs, train_outputs, val_outputs = train_test_split(input_data, output_data, test_size=0.2, random_state=42)
    
    print("Train Inputs Shape:", train_inputs.shape)
    print("Validation Inputs Shape:", val_inputs.shape)
    print("Test Inputs Shape:", test_inputs.shape)
    print("Train Outputs Shape:", train_outputs.shape)
    print("Validation Outputs Shape:", val_outputs.shape)
    print("Test Outputs Shape:", test_outputs.shape)

    

    #Calculate the attribute vectors for all the train outputs
    train_outputs_attr = apply_transform(train_outputs)
    val_outputs_attr = apply_transform(val_outputs)
    test_outputs_attr = apply_transform(test_outputs)
    
    # Initialize the model
    model = FullyConnectedNN(C, 2*L, 5, 128)
    
    # Train the model
    print("Training")
    train_model(model, train_inputs, train_outputs_attr, val_inputs, val_outputs_attr, 50)

    #Training Loss
    print('\nTraining Losses:')
    train_predict = predict(model, train_inputs)
    val_predict = predict(model, val_inputs)
    test_predict = predict(model, test_inputs)
    
    mse = measure_mse(train_predict, train_outputs_attr)
    print(f'Train MSE: {mse:.4f}')
    mse = measure_mse(val_predict, val_outputs_attr)
    print(f'Validation MSE: {mse:.4f}')
    mse = measure_mse(test_predict, test_outputs_attr)
    print(f'Test MSE: {mse:.4f}')
    

    #Apply inverse DTFT on the predicted outputs to get back in time domain
    train_result = apply_inverse(train_predict)
    val_result = apply_inverse(val_predict)
    test_result = apply_inverse(test_predict)
    
    print('\nEvalation Losses:')
    mse = measure_mse(train_result, train_outputs)
    print(f'Train MSE: {mse:.4f}')
    mse = measure_mse(val_result, val_outputs)
    print(f'Validation MSE: {mse:.4f}')
    mse = measure_mse(test_result, test_outputs)
    print(f'Test MSE: {mse:.4f}')

    total_params = sum(p.numel() for p in model.parameters())
    print(f"\nTotal number of parameters: {total_params}")
 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
pipeline()