In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from sklearn.model_selection import train_test_split


In [3]:
full_df = pd.read_csv('../data/processsed/combined_dataset.csv')

X = full_df[['wavenumber', 'transmittance']]
y = full_df['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [11]:

class FTIRDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        label = self.data.iloc[idx, 2]  
        features = torch.tensor(self.data.iloc[idx, :2].values, dtype=torch.float32)
        return features, label
    

In [12]:
files = ['Blood_and_RNA_1mg_and_Lys_1mg.CSV', 'Lys_1_mg_Blood_Raw.CSV', 'Lys_10_ng_Blood_Raw.CSV', 
         'RNA_1_mg_and_Blood_Raw.CSV', 'RNA_10_ng_and_Blood_Raw.CSV']


train_data, test_data = train_test_split(full_df, test_size=0.2, random_state=42)

# Create instances of the FTIRDataset class
train_dataset = FTIRDataset(train_data)
test_dataset = FTIRDataset(test_data)

# Create DataLoader for training and testing
batch_size = 1  # set to 1 since each file is treated as an individual sample
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [13]:
class FTIRClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(FTIRClassifier, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

In [15]:
# Specify input size based on the number of features in your data
input_size = len(full_df.columns) - 1  # Subtract 1 for the label column

# Specify hidden layer size and output size based on your requirements
hidden_size = 64
output_size = len(files)  # Number of classes equals the number of files

# Instantiate the model
model = FTIRClassifier(input_size, hidden_size, output_size)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [17]:
num_epochs = 50

for epoch in range(num_epochs):
    model.train()
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    # Evaluation during training (optional)
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        accuracy = correct / total
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item():.4f}, Accuracy: {accuracy:.4f}')

TypeError: can't convert np.ndarray of type numpy.object_. The only supported types are: float64, float32, float16, complex64, complex128, int64, int32, int16, int8, uint8, and bool.

In [None]:
# Convert test data to PyTorch tensor
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)

# Switch the model to evaluation mode
model.eval()

with torch.no_grad():
    test_outputs = model(X_test_tensor)
    _, predicted_labels = torch.max(test_outputs, 1)

# Compare predicted labels with ground truth labels (y_test)
accuracy = (predicted_labels == y_test).sum().item() / len(y_test)
print(f'Test Accuracy: {accuracy:.4f}')
