In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


In [25]:
class CNNRegressor(nn.Module):
    def __init__(self):
        super(CNNRegressor, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=4, out_channels=16, kernel_size=2)  # Reduce kernel size to 2
        self.conv2 = nn.Conv1d(in_channels=16, out_channels=32, kernel_size=2)  # Reduce kernel size to 2
        self.pool = nn.MaxPool1d(kernel_size=2)
        self.fc1 = nn.Linear(32 * 1, 64)  # Adjust input size accordingly
        self.fc2 = nn.Linear(64, 1)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x.permute(0, 2, 1))))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 32 * 1)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x


In [26]:
data = pd.read_csv("dataset/dataset_membrane_.csv")

In [27]:
# Split the dataset into features (X) and target (y)
X = data[['SW','MWCO', 'CA', 'ZP']].values
y = data['Permeance'].values.reshape(-1, 1)  # Reshape to make it a column vector


In [28]:
# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [29]:
# Split the dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [30]:
# Convert the data into PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
X_val = torch.tensor(X_val, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.float32)

In [31]:
# Update data format for CNN (add channel dimension)
X_train = X_train.unsqueeze(1)
X_val = X_val.unsqueeze(1)

In [32]:
# Define the model
model = CNNRegressor()

In [33]:
# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [34]:
# Define number of epochs
epochs = 100


In [35]:
# Lists to store training and validation loss
train_loss_history = []
val_loss_history = []


In [36]:
# Training loop
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train)
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()
    train_loss = loss.item()
    train_loss_history.append(train_loss)

RuntimeError: Calculated padded input size per channel: (1). Kernel size: (2). Kernel size can't be greater than actual input size

In [None]:
# Validation
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for i in range(len(X_val)):
            outputs = model(X_val[i].unsqueeze(0).unsqueeze(0))
            loss = criterion(outputs, y_val[i])
            val_loss += loss.item()
    val_loss /= len(X_val)
    val_loss_history.append(val_loss)

    # Print training and validation loss for each epoch
    print(f"Epoch [{epoch+1}/{epochs}], Train Loss: {train_loss}, Val Loss: {val_loss}")


In [None]:
# Plot training and validation loss
plt.plot(train_loss_history, label='Training Loss')
plt.plot(val_loss_history, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
# Save the model
torch.save(model.state_dict(), 'cnn_regression_model.pth')

In [None]:
# Predictions
model.eval()
predictions = []
with torch.no_grad():
    for i in range(len(X_val)):
        outputs = model(X_val[i].unsqueeze(0).unsqueeze(0))
        predictions.append(outputs.item())


In [None]:
# Save predictions and ground truth as CSV file
df = pd.DataFrame({'GroundTruth': y_val.squeeze().numpy(), 'Prediction': predictions})
df.to_csv('predictions.csv', index=False)