In [70]:
import pandas as pd
import torch
import torch.nn
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
import ast

In [71]:
class CustomDataset(Dataset):
  def __init__(self, csv_file):
    self.file = pd.read_csv(csv_file)
    self.file.dropna()
    self.file["Data"] = self.file["Data"].apply(ast.literal_eval)

  def __len__(self):
    return len(self.file)

  def __getitem__(self, idx):
    if torch.is_tensor(idx):
      idx = idx.tolist()

    row = self.file.iloc[idx]
    features = torch.tensor(row["Data"], dtype=torch.float32)
    target = torch.tensor(row["BandGap"], dtype=torch.float32)

    return features, target

csv_file = "UpdatedMaterialsDF.csv"

BG_DataSet = CustomDataset(csv_file)

In [72]:
TrainingSplit = int(0.8*len(BG_DataSet))
TestingSplit = len(BG_DataSet) - TrainingSplit

Batch_Size = 32

Train_Dataset, Testing_Dataset = torch.utils.data.random_split(BG_DataSet, [TrainingSplit, TestingSplit])

Train_Loader = DataLoader(Train_Dataset, batch_size=Batch_Size, shuffle=True)

Testing_Loader = DataLoader(Testing_Dataset, batch_size=Batch_Size, shuffle=True)

In [73]:
for features, target in Train_Loader:
    assert not torch.isnan(features).any(), "Training data contains NaNs"
    assert not torch.isnan(target).any(), "Training labels contain NaNs"
for features, target in Testing_Loader:
    assert not torch.isnan(features).any(), "Testing data contains NaNs"
    assert not torch.isnan(target).any(), "Testing labels contain NaNs"

In [79]:
class SimpleFFNN(torch.nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleFFNN, self).__init__()
        self.fc1 = torch.nn.Linear(input_size, hidden_size)
        self.fc2 = torch.nn.Linear(hidden_size, hidden_size)
        self.fc3 = torch.nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

input_size = 7  # Number of features
hidden_size = 32
output_size = 1
learning_rate = 0.0001
n_epochs = 100

In [80]:
model = SimpleFFNN(input_size, hidden_size, output_size)
loss_fn = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [81]:
# Lists to store losses
list_loss_training = []
list_epoch = []
list_loss_testing = []

def train(model, train_loader, loss_fn, optimizer):
    model.train()
    running_loss = 0.0
    for features, target in train_loader:
        optimizer.zero_grad()
        output = model(features)
        loss = loss_fn(output.squeeze(), target)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        running_loss += loss.item()
    return running_loss / len(train_loader)

def test(model, test_loader, loss_fn):
    model.eval()
    running_loss = 0.0
    with torch.no_grad():
        for features, target in test_loader:
            output = model(features)
            loss = loss_fn(output.squeeze(), target)
            running_loss += loss.item()
    return running_loss / len(test_loader)

# Training loop
for epoch in range(n_epochs):
    train_loss = train(model, Train_Loader, loss_fn, optimizer)
    test_loss = test(model, Testing_Loader, loss_fn)

    list_loss_training.append(train_loss)
    list_loss_testing.append(test_loss)
    list_epoch.append(epoch + 1)

    print(f'Epoch {epoch+1}/{n_epochs}, Training Loss: {train_loss:.4f}, Testing Loss: {test_loss:.4f}')

Epoch 1/100, Training Loss: 1.7159, Testing Loss: 1.6321
Epoch 2/100, Training Loss: 1.5679, Testing Loss: 1.5595
Epoch 3/100, Training Loss: 1.5148, Testing Loss: 1.5236
Epoch 4/100, Training Loss: 1.4908, Testing Loss: 1.4979
Epoch 5/100, Training Loss: 1.4703, Testing Loss: 1.4799
Epoch 6/100, Training Loss: 1.4536, Testing Loss: 1.4621
Epoch 7/100, Training Loss: 1.4385, Testing Loss: 1.4480
Epoch 8/100, Training Loss: 1.4300, Testing Loss: 1.4618
Epoch 9/100, Training Loss: 1.4223, Testing Loss: 1.4433
Epoch 10/100, Training Loss: 1.4161, Testing Loss: 1.4601
Epoch 11/100, Training Loss: 1.4123, Testing Loss: 1.4234
Epoch 12/100, Training Loss: 1.4090, Testing Loss: 1.4194
Epoch 13/100, Training Loss: 1.4048, Testing Loss: 1.4183
Epoch 14/100, Training Loss: 1.4012, Testing Loss: 1.4101
Epoch 15/100, Training Loss: 1.3978, Testing Loss: 1.4099
Epoch 16/100, Training Loss: 1.3958, Testing Loss: 1.4080
Epoch 17/100, Training Loss: 1.3936, Testing Loss: 1.4016
Epoch 18/100, Training 

In [82]:
LossOutFile = "LossCSV.csv"

LossDF = pd.DataFrame({"Epoch" : list_epoch, "Training Loss" : list_loss_training, "Testing_Loss" : list_loss_testing})
LossDF.to_csv(LossOutFile)

In [87]:
torch.save(model.state_dict(), f="SimpleFFNN_BG")