In [45]:
import pandas as pd
import numpy as np
df = pd.read_csv('distillation-tower.csv')
df = df.drop('Date', axis=1)
means = df.mean()
std_devs = df.std()

In [46]:
num_samples = 400

# Generate synthetic data correctly with dimensions (num_samples, df.shape[1])
synthetic_data = np.random.randn(num_samples, df.shape[1]) * std_devs.values + means.values
synthetic_df = pd.DataFrame(synthetic_data, columns=df.columns)

# Check the generated synthetic data
print(synthetic_df.head())

        Temp1      FlowC1       Temp2      TempC1       Temp3      TempC2  \
0  128.124615  270.188088  386.482357   93.147476  483.041930  471.815716   
1  124.822382  342.350696  365.314885  113.742532  500.098009  499.326630   
2  130.981005  174.068615  383.231966  118.635576  517.115613  476.094488   
3  146.890003  196.145094  385.233360  109.397983  462.545515  482.847117   
4  143.796712  295.615358  371.947968  131.637406  487.979805  491.032501   

       TempC3       Temp4  PressureC1       Temp5  ...      Temp10    FlowC3  \
0  170.982055  182.782846  237.826934  476.521888  ...  528.817932  9.643187   
1  147.285840  190.453981  263.724524  489.703322  ...  480.160107  9.050446   
2  181.231110  168.834443  247.507934  489.443189  ...  488.379484  4.372386   
3  166.766495  195.146423  230.344701  497.128195  ...  472.799323  6.853414   
4  177.407165  176.345540  228.218523  497.502339  ...  508.916426  6.879302   

      FlowC4     Temp11      Temp12  InvTemp1  InvTemp2 

In [47]:
combined_df = pd.concat([df, synthetic_df], ignore_index=True)

In [48]:
X = combined_df.drop('VapourPressure', axis=1)
X = X.iloc[:, 0:]
y = combined_df['VapourPressure']

In [49]:
from sklearn.model_selection import train_test_split

X_train_control, X_test_control, y_train_control, y_test_control = train_test_split(X, y, test_size=0.2, random_state=42)

In [50]:
import torch
from torch.utils.data import TensorDataset, DataLoader

X_train_control_tensor = torch.tensor(X_train_control.values, dtype=torch.float32)
y_train_control_tensor = torch.tensor(y_train_control.values, dtype=torch.float32)
X_test_control_tensor = torch.tensor(X_test_control.values, dtype=torch.float32)
y_test_control_tensor = torch.tensor(y_test_control.values, dtype=torch.float32)

train_control_dataset = TensorDataset(X_train_control_tensor, y_train_control_tensor)
test_control_dataset = TensorDataset(X_test_control_tensor, y_test_control_tensor)

train_control_dataloader = DataLoader(train_control_dataset, batch_size=32, shuffle=True)
test_control_dataloader = DataLoader(test_control_dataset, batch_size=32, shuffle=False)

In [53]:
import torch.nn as nn

class VaporPressureNet(nn.Module):
    def __init__(self, dropout_rate=0.005):
        super(VaporPressureNet, self).__init__()
        self.fc1 = nn.Linear(26, 64)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout_rate)  # Dropout layer with configurable rate
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, 1)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.dropout(x)  # Apply dropout after the first ReLU activation
        x = self.relu(self.fc2(x))
        x = self.dropout(x)  # Optionally, apply dropout again after the second ReLU activation
        x = self.fc3(x)
        return x


In [54]:
nn1 = VaporPressureNet()

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(nn1.parameters(), lr=0.001)

epochs = 1000

for epoch in range(epochs):
    nn1.train()
    running_loss = 0.0
    for inputs, targets in train_control_dataloader:
        optimizer.zero_grad()
        outputs = nn1(inputs)
        loss = criterion(outputs.squeeze(), targets)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
    epoch_loss = running_loss / len(train_control_dataloader.dataset)
    print(f'Epoch {epoch+1}/{epochs}, Loss: {epoch_loss:.4f}')

nn1.eval()
with torch.no_grad():
    predictions1 = []
    targets1 = []
    for inputs, true_values in test_control_dataloader:
        outputs1 = nn1(inputs)
        predictions1.extend(outputs1.squeeze().tolist())
        targets1.extend(true_values.tolist())

from sklearn.metrics import r2_score, mean_absolute_error

r2_nn1 = r2_score(targets1, predictions1)
mae_nn1 = mean_absolute_error(targets1, predictions1)
print(f'R^2 Score on test set: {r2_nn1:.4f}')
print(f'Mean absolute error on test set: {mae_nn1: .4f}')

Epoch 1/1000, Loss: 819.0406
Epoch 2/1000, Loss: 134.2169
Epoch 3/1000, Loss: 76.8606
Epoch 4/1000, Loss: 71.3949
Epoch 5/1000, Loss: 64.3475
Epoch 6/1000, Loss: 64.6941
Epoch 7/1000, Loss: 59.9821
Epoch 8/1000, Loss: 60.8227
Epoch 9/1000, Loss: 58.8336
Epoch 10/1000, Loss: 59.9355
Epoch 11/1000, Loss: 59.9659
Epoch 12/1000, Loss: 55.6022
Epoch 13/1000, Loss: 57.1350
Epoch 14/1000, Loss: 58.0387
Epoch 15/1000, Loss: 58.4024
Epoch 16/1000, Loss: 61.6438
Epoch 17/1000, Loss: 55.5427
Epoch 18/1000, Loss: 57.3444
Epoch 19/1000, Loss: 55.6364
Epoch 20/1000, Loss: 58.8801
Epoch 21/1000, Loss: 56.4204
Epoch 22/1000, Loss: 54.1865
Epoch 23/1000, Loss: 53.2087
Epoch 24/1000, Loss: 54.7013
Epoch 25/1000, Loss: 56.9015
Epoch 26/1000, Loss: 58.9788
Epoch 27/1000, Loss: 54.0284
Epoch 28/1000, Loss: 56.4090
Epoch 29/1000, Loss: 54.0214
Epoch 30/1000, Loss: 53.7553
Epoch 31/1000, Loss: 56.0117
Epoch 32/1000, Loss: 52.8615
Epoch 33/1000, Loss: 52.9900
Epoch 34/1000, Loss: 55.9577
Epoch 35/1000, Loss: 

In [28]:
y

0      32.502600
1      34.859800
2      32.166600
3      30.406400
4      30.923800
         ...    
648    31.223724
649    40.427078
650    45.683913
651    38.833789
652    35.835470
Name: VapourPressure, Length: 653, dtype: float64