In [None]:
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader, Dataset
import math
import matplotlib as plt
import seaborn as sns
import pandas as pd
from sklearn.preprocessing import StandardScaler


In [None]:
data = pd.read_csv('mydata.csv')
data = data.drop(columns='Pressure Condition')
column_names = ['Temp_stage_1', 'Temp_stage_2', 'Temp_stage_3', 'Temp_stage_4', 'Temp_stage_5',
              'Temp_stage_6', 'Temp_stage_7', 'Temp_stage_8','Temp_stage_9','Temp_stage_10', 
              'Temp_stage_11','Reflux_1','Reflux_2','Reflux_3','Column Inlet','Inlet Flow','Product Flow ',
              'hx_1_inlet','hx_2_inlet','hx_3_inlet','hx_4_inlet', 'hx_1_outlet','hx_2_outlet','hx_3_outlet',
              'hx_4_outlet','level_1','level_2','level_3','level_4','level_5','level_6','level_7',
              'level_8','level_9','level_10','level_11','level_12','h_gas_det_1','h_gas_det_2','h_gas_det_3',
              'h_gas_det_4','h_gas_det_5','h_gas_det_6',' Impurities']

In [None]:
# Create an instance of StandardScaler
scaler = StandardScaler()

# Fit the scaler to data
scaler.fit(data)

# Transform the data to Standard scale
target_1 = scaler.transform(data)
target_1.shape

In [None]:
# Specify length of test data, Takes first ## from csv
num_data = 8000
target = pd.DataFrame(target_1, columns = column_names).iloc[:num_data]
target.shape

In [None]:
# Input size and output size specify the shape of the tensor fed into the NN
input_size = len(target.columns)
output_size= len(target.columns)
# Hidden Layers and num Layers specify the size of the NN, num_layers is arbitrary
hidden_size = len(target.columns)
sequence_length = 100
num_layers = 5
# batch size is the size of data used in training
batch_size = 32
# Learning rate is the step size in optimizer
lr_g=0.00001
lr_d=0.00001
# Number of epochs  is the number of training cycles
num_epochs = 50

In [None]:
class TimeSeriesDataset(Dataset):
    def __init__(self, data, seq_length, output_steps):
        self.data = torch.tensor(data.values, dtype=torch.float32)
        self.seq_length = seq_length
        self.output_steps = output_steps

    def __len__(self):
        return len(self.data) - self.seq_length - self.output_steps + 1

    def __getitem__(self, index):
        return (self.data[index:index+self.seq_length],
                self.data[index+self.seq_length:index+self.seq_length+self.output_steps])

In [None]:
# parameters
seq_length = 10
output_steps = 10
batch_size = 32
num_heads = 4
dropout = 0.1
# instantiate the dataset and dataloader
dataset = TimeSeriesDataset(target, seq_length, output_steps)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)

In [None]:
# # Convert data to numpy
# data = target.to_numpy()

# # Convert numpy array to PyTorch tensor
# tensor = torch.tensor(data, dtype=torch.float32)

# # Create TimeSeriesDataset object
# dataset = TensorDataset(tensor)

# # Create DataLoader for batch processing
# dataloader = DataLoader(dataset, batch_size, shuffle=False)

In [None]:
class TimeSeriesDataset(Dataset):
    def __init__(self, data, seq_length, output_steps):
        self.data = torch.tensor(data.values, dtype=torch.float32)
        self.seq_length = seq_length
        self.output_steps = output_steps

    def __len__(self):
        return len(self.data) - self.seq_length - self.output_steps + 1

    def __getitem__(self, index):
        return (self.data[index:index+self.seq_length],
                self.data[index+self.seq_length:index+self.seq_length+self.output_steps])


In [None]:
class Transformer(nn.Module):
    def __init__(self, input_size, output_size, num_layers, hidden_size, num_heads, dropout, seq_length, n_output_steps):
        super(Transformer, self).__init__()

        self.seq_length = seq_length
        self.n_output_steps = n_output_steps

        self.embedding = nn.Linear(input_size, hidden_size)
        self.pos_encoder = PositionalEncoding(hidden_size, dropout)
        encoder_layer = nn.TransformerEncoderLayer(hidden_size, num_heads, hidden_size, dropout)
        self.encoder = nn.TransformerEncoder(encoder_layer, num_layers)
        self.decoder = nn.Linear(hidden_size, output_size)

        self.init_weights()

    def init_weights(self):
        init_range = 0.1
        self.embedding.weight.data.uniform_(-init_range, init_range)
        self.decoder.bias.data.zero_()
        self.decoder.weight.data.uniform_(-init_range, init_range)

    def forward(self, input):
        batch_size = input.size(0)
#         input = input.view(-1, input.size(-1))  # Reshape to (batch_size * seq_length, input_size)
        input = self.embedding(input)  # Embedding
#         input = input.view(batch_size, self.seq_length, 44)  # Reshape back to (batch_size, seq_length, hidden_size)
        input = self.pos_encoder(input)  # Positional encoding
        output = self.encoder(input)  # Encoding
        output = self.decoder(output)  # Linear layer
        return output
    
# Define the positional encoding module
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return self.dropout(x)


In [None]:
# Instantiate the model
model = Transformer(input_size, output_size, num_layers, hidden_size, num_heads, dropout, seq_length, output_steps)

# Loss and optimizer
criterion = nn.MSELoss()  # Change this if necessary
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  # Change this if necessary


In [None]:
# Training loop
num_epochs = 100  # Change this if necessary
model.train()  # Set the model to training mode
for epoch in range(num_epochs):
    for i, (inputs, targets) in enumerate(dataloader):
        # Move data to the correct device
        inputs = inputs
        targets = targets
#         print("Shape of inputs:", inputs.shape)
#         print("Shape of targets:", targets.shape)

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1) % 100 == 0:  # Print loss every 100 batches
            print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(dataloader)}], Loss: {loss.item():.4f}')


In [None]:
# Dummy Run

# Parameters
batch_size = 32
seq_length = 100
input_size = 44
output_size = 44
hidden_size = 64
num_layers = 2
num_heads = 8
dropout = 0.1
n_output_steps = 2
# Create the transformer model
model = Transformer(input_size, output_size, num_layers, hidden_size, num_heads, dropout, seq_length, n_output_steps)

# Create dummy input data
input_data = torch.rand(batch_size, seq_length, input_size)

# Pass the input data through the model
output = model(input_data)

print(f"Output shape: {output.shape}")

# Verify that output shape is (batch_size, n_output_steps, output_size)
assert output.shape == (batch_size, n_output_steps, output_size)

In [None]:
def forecast(model, data, seq_length, output_steps):
    # Ensure the model is in evaluation mode
    model.eval()
    
    # Make sure data is a torch Tensor
    if not isinstance(data, torch.Tensor):
        data = torch.Tensor(data)
        
    # Number of forecasts
    num_forecasts = len(data) - seq_length
    
    # Container for predictions
    predictions = torch.zeros(num_forecasts, output_steps, data.shape[-1])
    
    # Slide over the time-series data
    for i in range(num_forecasts):
        # Get a sequence of data
        seq = data[i:i+seq_length]
        
        # Add an extra dimension for batch
        seq = seq.unsqueeze(0)
        
        # Compute the output
        with torch.no_grad():
            out = model(seq)
        
        # Save only the outputs for the last time step
        predictions[i] = out[0]
    
    return predictions


In [None]:
pred_data = pd.DataFrame(target_1, columns = column_names).iloc[6000:num_data]
pred_data.shape

In [None]:
test_data = pd.DataFrame(target_1, columns = column_names).iloc[num_data:]
test_data.shape

In [None]:
input_data = torch.tensor(pred_data.values, dtype=torch.float32)

In [None]:
test_array_np = test_data.values
test_array_np

In [None]:
# Some dummy data for demonstration
# data = torch.rand(1000, 44)

# Forecast using the trained model
predictions = forecast(model, input_data, seq_length, output_steps)
predictions.shape
# print(predictions)


In [None]:
# Iterate over each feature
for i in range(44):
    plt.figure(figsize=(12, 6))
    
    
    
    # Plot the actual test data
    plt.plot(test_array_np[:, i], label='Actual')
    
    plt.title(f"Feature {i+1}")
    plt.legend()
    plt.show()

In [None]:
import matplotlib.pyplot as plt
predictions_np = predictions.numpy()

# Iterate over each feature
for i in range(predictions_np.shape[-1]):
    plt.figure(figsize=(12, 6))
    plt.plot(predictions_np[:, :, i])
    plt.plot(test_array_np[:, i], label='Actual')
    plt.title(f"Feature {i+1}")
    plt.show()

In [None]:
non_overlap_predictions = predictions_np[:, -1, :]

print("Shape of non-overlapping predictions: ", non_overlap_predictions.shape)


In [None]:
import matplotlib.pyplot as plt

# Convert pandas DataFrame to numpy array
test_array_np = test_data.values

# Get the feature names
feature_names = test_data.columns

# Get non-overlapping predictions
non_overlap_predictions = predictions_np[:, -1, :]

# Iterate over each feature
for i in range(non_overlap_predictions.shape[-1]):
    plt.figure(figsize=(12, 6))
    
    # Plot the predicted data
    plt.plot(non_overlap_predictions[:, i], label='Predictions')
    
    # Plot the actual test data
    plt.plot(test_array_np[:, i], label='Actual')
    
    plt.title(f"{feature_names[i]}")
    plt.legend()
    plt.show()


In [None]:
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt

# Convert pandas DataFrame to numpy array
test_array_np = test_data.values

# Get non-overlapping predictions
non_overlap_predictions = predictions_np[:, -1, :]

# Run t-SNE on test data and predictions
tsne = TSNE(n_components=2, random_state=0)
test_tsne = tsne.fit_transform(test_array_np)
predictions_tsne = tsne.fit_transform(non_overlap_predictions)

# Plot test data
plt.scatter(test_tsne[:, 0], test_tsne[:, 1], label='Test data')

# Plot prediction data
plt.scatter(predictions_tsne[:, 0], predictions_tsne[:, 1], label='Predictions')

plt.legend()
plt.show()


In [None]:
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

# Convert pandas DataFrame to numpy array
test_array_np = test_data.values

# Get non-overlapping predictions
non_overlap_predictions = predictions_np[:, -1, :]

# Run PCA on test data and predictions
pca = PCA(n_components=2)
test_pca = pca.fit_transform(test_array_np)
predictions_pca = pca.transform(non_overlap_predictions)

# Plot test data
plt.scatter(test_pca[:, 0], test_pca[:, 1], label='Test data')

# Plot prediction data
plt.scatter(predictions_pca[:, 0], predictions_pca[:, 1], label='Predictions')

plt.legend()
plt.show()
