In [589]:
import argparse
import os
import shutil
import sys
import time
import warnings
import tqdm

import csv
import pandas as pd

from random import sample
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.optim.lr_scheduler import MultiStepLR
from torchmetrics import R2Score
from torch.utils.data.dataloader import default_collate
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data import Dataset, DataLoader

from sklearn.model_selection import KFold
from sklearn import metrics
from sklearn.metrics import r2_score

from cgcnn.data import CIFData
from cgcnn.data import collate_pool, get_train_val_test_loader
from cgcnn.model import CrystalGraphConvNet

import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

import copy

In [590]:
#Import dataaset
data_dir = r'C:\Users\ezxac5\Documents\GitHub\CGCNN_Jupyter\Jupyter_code\data\wright_data_3'

In [591]:
dataset = CIFData(data_dir) # Extracts features, ids and labels from dataset
collate_fn = collate_pool # Collect and combine a list of data and return a batch for predicting crystal properties

In [592]:
#Set save directory
results_dir = "KFold_Results"
if not os.path.exists(results_dir):
    os.makedirs(results_dir)

# Define Functions

## Main Functions

In [593]:
def main():
    global best_mae_error

In [594]:
class Normalizer(object):
    """Normalize a Tensor and restore it later. """

    def __init__(self, tensor):
        """tensor is taken as a sample to calculate the mean and std"""
        self.mean = torch.mean(tensor)
        self.std = torch.std(tensor)

    def norm(self, tensor):
        return (tensor - self.mean) / self.std

    def denorm(self, normed_tensor):
        return normed_tensor * self.std + self.mean

    def state_dict(self):
        return {'mean': self.mean,
                'std': self.std}

    def load_state_dict(self, state_dict):
        self.mean = state_dict['mean']
        self.std = state_dict['std']

In [595]:
class EarlyStopping():    
    """
    Stop training if validation loss does not improve further or gets worse. Avoids overfitting.
    """   
   
    def __init__(self, patience=10, min_delta=0, restore_best_weights=True):
        self.patience = patience
        self.min_delta = min_delta
        self.restore_best_weights = restore_best_weights
        self.best_model = None
        self.best_loss = None
        self.counter = 0
        self.status = ""
    
    def __call__(self, model, val_loss):
        if self.best_loss == None:
          self.best_loss = val_loss
          self.best_model = copy.deepcopy(model)
        elif self.best_loss - val_loss > self.min_delta:
          self.best_loss = val_loss
          self.counter = 0
          self.best_model.load_state_dict(model.state_dict())
        elif self.best_loss - val_loss < self.min_delta:
          self.counter += 1
          if self.counter >= self.patience:
            self.status = f"Stopped on {self.counter}"
            if self.restore_best_weights:
              model.load_state_dict(self.best_model.state_dict())
            return True
        self.status = f"{self.counter}/{self.patience}"
        return False

In [596]:
def mae(prediction, target):
    """
    Computes the mean absolute error between prediction and target

    Parameters
    ----------

    prediction: torch.Tensor (N, 1)
    target: torch.Tensor (N, 1)
    """
    return torch.mean(torch.abs(target - prediction))

In [597]:
class AverageMeter(object):
    """Computes and stores the average and current value for the entire epoch"""

    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [598]:
def save_checkpoint(state, is_best, filename=results_dir + '\\'+ 'checkpoint.pth.tar'):
    """Save model checkpoint"""
    
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, results_dir + '\\'+ 'model_best.pth.tar')

In [599]:
def adjust_learning_rate(optimizer, epoch, k):
    """Sets the learning rate to the initial LR decayed by 10 every k epochs"""
    assert type(k) is int
    lr = args.lr * (0.1 ** (epoch // k))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

In [600]:
#Function to divide data to train and val datasets in K-fold CV
def get_train_val_test_loader_2(train_ids, test_ids, 
                              dataset, collate_fn=default_collate,
                              batch_size=64, num_workers=1, 
                              pin_memory=False, **kwargs):
    """
    Utility function for dividing a dataset to train, val, test datasets.

    !!! The dataset needs to be shuffled before using the function !!!

    Parameters
    ----------
    dataset: torch.utils.data.Dataset
      The full dataset to be divided.
    collate_fn: torch.utils.data.DataLoader
    batch_size: int
    train_id: Id of training data generated in kfold split
    test_id: Id of testing data generated in kfold split
    return_test: bool
      Whether to return the test dataset loader. If False, the last test_size
      data will be hidden.
    num_workers: int
    pin_memory: bool

    Returns
    -------
    train_loader: torch.utils.data.DataLoader
      DataLoader that random samples the training data.
    (test_loader): torch.utils.data.DataLoader
      DataLoader that random samples the test data, returns if
        return_test=True.
    """
    total_size = len(dataset)

    train_sampler = SubsetRandomSampler(train_ids)
    test_sampler = SubsetRandomSampler(test_ids)
    train_loader = DataLoader(dataset, batch_size=batch_size,
                              sampler=train_sampler,
                              num_workers=num_workers,
                              collate_fn=collate_fn, pin_memory=pin_memory)
    test_loader = DataLoader(dataset, batch_size=batch_size,
                             sampler=test_sampler,
                             num_workers=num_workers,
                             collate_fn=collate_fn, pin_memory=pin_memory)

    return train_loader, test_loader

In [601]:
def train(train_loader, model, criterion, optimizer, epoch, normalizer):
    """Train function"""
    
    # Store training data and predictions
    train_targets = []
    train_preds = []
    train_cif_ids = []
    
    mse_train = []
    mae_train = []
    r2_train = []
    
    # Switch to train mode
    model.train()
    
    for i, (input, train_target, batch_t_cif_ids) in enumerate(train_loader):    
        input_var = (Variable(input[0]),
                     Variable(input[1]),
                     input[2],
                     input[3])
        
        # Normalize target
        target_normed = normalizer.norm(train_target)
        target_var = Variable(target_normed)
        
        # Compute output
        output = model(*input_var)
        
        # Denormalize and store training data and prediction for batch
        train_pred = normalizer.denorm(output.data.cpu())
        train_preds += train_pred.view(-1).tolist()
        train_targets += train_target.view(-1).tolist()
        train_cif_ids += batch_t_cif_ids   
        
        # Measure error for batch
        loss = criterion(output, target_var)
        mae_error = mae(train_pred, train_target)
        r2score = R2Score()
        r_2 = r2score(train_pred, train_target)
        
        # Update batch loss info
        mse_train.append(loss.item())
        mae_train.append(mae_error.item())
        r2_train.append(r_2.item())
        
        # Compute gradient, perform optimizer step and zero gradients
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        
    # Calculate average error metrics for epoch
    mse_train_epoch = np.mean(mse_train)
    mae_train_epoch = np.mean(mae_train)
    r2_train_epoch = np.mean(r2_train)
    
    return mse_train_epoch, mae_train_epoch, r2_train_epoch

In [602]:
def validate(val_loader, model, criterion, normalizer, test=False):
    """Validation function"""
    
    test_targets = []
    test_preds = []
    test_cif_ids = []
    
    mse_test = []
    mae_test = []
    r2_test = []
    
    # Switch to evaluation mode
    model.eval()
    
    with torch.no_grad():
        for i, (input, target, batch_cif_ids) in enumerate(val_loader):          
            input_var = (Variable(input[0]),
                         Variable(input[1]),
                         input[2],
                         input[3])
            
            target_normed = normalizer.norm(target)
            target_var = Variable(target_normed)
            
            #Comput output for batch
            output = model(*input_var)
            
            # Denormalize and store testing data and predictions for batch
            test_pred = normalizer.denorm(output.data.cpu())
            test_target = target
            test_preds += test_pred.view(-1).tolist()
            test_targets += test_target.view(-1).tolist()
            test_cif_ids += batch_cif_ids
            
            # Measure error for batch
            loss = criterion(output, target_var)
            mae_error = mae(test_pred, test_target)
            r2score = R2Score()
            r_2 = r2score(test_pred, test_target)
            
            # Update batch loss info
            mse_test.append(loss.item())
            mae_test.append(mae_error.item())
            r2_test.append(r_2.item())
            
    # Calculate average error metrics for epoch
    mse_test_epoch = np.mean(mse_test)
    mae_test_epoch = np.mean(mae_test)
    r2_test_epoch = np.mean(r2_test)
        
    if test:
        star_label = '**'
        with open('test_results.csv', 'w') as f:
            writer = csv.writer(f)
            for cif_id, target, pred in zip(test_cif_ids, test_targets,
                                            test_preds):
                writer.writerow((cif_id, target, pred))
    else:
        star_label = '*'
    
    return mse_test_epoch, mae_test_epoch, r2_test_epoch

## Plot functions

In [603]:
def epoch_acc_plotter(epoch, train_error, test_error, Fold, title):
    '''
    Plotly plot training/validation loss vs epoch
    '''
    fig = go.Figure()
    fig.add_trace(go.Scatter(x = epoch, 
                             y=train_error,
                             name=f'Training Results k-{Fold}'))
    
    fig.add_trace(go.Scatter(x = epoch, 
                             y=test_error,
                             name=f'Testing Results k-{Fold}'))
                  
    fig.update_layout(
    xaxis_title="Epoch",
    yaxis_title=f"{title}")

    fig.show()    

In [604]:
def epoch_acc_plotter_sav(epoch, train_error, test_error, Fold, title):
    '''
    Plot training/validation loss vs epoch - for saving plot as image
    '''
    plt.figure(figsize=(15, 6))
    plt.plot(epoch, train_error, label=f'Training results-{Fold}')
    plt.plot(epoch, test_error, label=f'Testing results-{Fold}')
    plt.grid(visible=True, which='both', color='0.65', linestyle='-')
    plt.xticks(fontsize=18)
    plt.yticks(fontsize=18)
    plt.xlabel("Epoch", fontsize=20)
    plt.ylabel(f"{title}", fontsize=20)
    plt.legend(loc='best', fontsize=18)
    plot1_file = f'CGCNN_k-{Fold}_epoch_{title}_plt.jpg'
    plt.savefig(results_dir + '\\' + plot1_file, bbox_inches = 'tight')
    plt.close()

In [605]:
def pred_acc_plotter(data, predictionTargets, predictions, Fold, R2, accuracy, title, save=True):
    '''
    Plotly plot for predicted vs real value
    '''
    fig = px.scatter(data, x = predictionTargets, y=predictions, hover_name='CIF ID', title=f'{title} data; \
    R2: {R2}, MAE: {accuracy}')
    
    fig.add_trace(
    go.Scatter(
    x=data[predictionTargets],
    y=data[predictionTargets],
    mode='lines',
    line=go.scatter.Line(color='black'),
    showlegend=False))
                  
    fig.update_layout(
    xaxis_title="Actual TC",
    yaxis_title="Predicted TC",
    font_size=18,
    title_font_size=22)
    
    if save==True:
        fig.write_html(results_dir + '\\' + f"CGCNN_{title}_acc_plt.html")
        plt.figure(figsize=(15, 6))
        plt.scatter(data[predictionTargets],data[predictions], label='Prediction MAE: '+str(float("{0:.5f}".format(accuracy))))
        plt.plot(data[predictionTargets], data[predictionTargets], label='R2: '+str(float("{0:.5f}".format(R2))))
        plt.grid(visible=True, which='both', color='0.65', linestyle='-')
        plt.xticks(fontsize=18)
        plt.yticks(fontsize=18)
        plt.xlabel("Thermal conductivity validation data", fontsize=20)
        plt.ylabel("Prediction (CGCNN) (W/(mK))", fontsize=20)
        plt.legend(loc='best', fontsize=18)
        plot1_file = f'CGCNN_k-{Fold}_{title}_acc_plt.jpg'
        plt.savefig(results_dir + '\\' + plot1_file, bbox_inches = 'tight')
        plt.close()    

    fig.show()

In [606]:
def data_ditrb_plotter(fold, train_data, test_data):
    '''
    Plot box plots and histograms of TC distribution in training and testing data
    '''

    fig = make_subplots(rows = 2, cols = 1)
    fig.add_trace(go.Histogram(x=train_data['Train targets'], name='Train data', marker=dict(color='#1f77b4')),
                 row = 2,
                 col = 1)
    fig.add_trace(go.Histogram(x=test_data['Actual value'], name='Test data', marker=dict(color='#ff7f0e')),
                 row = 2,
                 col = 1)

    # Create box plots
    fig.add_trace(go.Box(x=train_data['Train targets'], name='Train data', marker=dict(color='#1f77b4'), showlegend=False),
                 row = 1,
                 col = 1)
    fig.add_trace(go.Box(x=test_data['Actual value'], name='Test data', marker=dict(color='#ff7f0e'), showlegend=False),
                 row = 1,
                 col = 1)

    # Update layout
    fig.update_xaxes(title_text='TC', row=1, col=1)
    fig.update_xaxes(title_text='TC', row=2, col=1)
    fig.update_yaxes(title_text='Frequency', row=2, col=1)
    fig.update_layout(barmode='overlay', title=f"Fold {fold} TC distribution", font_size=18, title_font_size=22)
    fig.update_traces(opacity=0.75)
    
    fig.write_html(results_dir + '\\' + f"CGCNN_k-{fold}_TC_distribution.html")

    fig.show()

In [607]:
def summary_error_metrics_plotly(folds, error_val_train, error_val_test, title):
    '''
    Plot average MAE and R2 values for each fold
    '''
    
    fig = go.Figure()
    
    fig.add_trace(go.Bar(
          x = np.array([val for val in error_val_train.keys()]),
          y = np.array([val for val in error_val_train.values()]),
          name='Train data',
          marker_color='#1f77b4'))
    
    fig.add_trace(go.Bar(
      x = np.array([val for val in error_val_test.keys()]),
      y = np.array([val for val in error_val_test.values()]),
      name='Test data',
      marker_color='#ff7f0e'))
    
    fig.update_layout(title=f"{title} for each fold", font_size=18, title_font_size=22)
    fig.update_traces(opacity=0.75)
    
    fig.write_html(results_dir + '\\' + f"{title}_summary.html")
    
    fig.show()

# Main

## Hyper-parameters

In [608]:
num_epochs = 50 # max number of epochs
batch_size = 64 # max batch size
lr = 0.001 # learning rate
num_workers = 1
pin_memory = False

best_mae_error = 1e10

## Create normalizer parameters

In [609]:
# Extract and normalize target values
sample_data_list = [dataset[i] for i in range(len(dataset))]
_, sample_target, _ = collate_pool(sample_data_list)
normalizer = Normalizer(sample_target)


Possible issue in cif file at line: Pr8.00


Possible issue in cif file at line: Zr5.33


Possible issue in cif file at line: Hf5.33


Possible issue in cif file at line: Sn5.33


Possible issue in cif file at line: O56


Possible issue in cif file at line: Nd3.20


Possible issue in cif file at line: Sm3.20


Possible issue in cif file at line: Eu3.20


Possible issue in cif file at line: Ce3.20


Possible issue in cif file at line: Zr16.00


Possible issue in cif file at line: Gd5.33


Possible issue in cif file at line: Eu5.33


Possible issue in cif file at line: Zr1.33


Possible issue in cif file at line: Hf1.33


Possible issue in cif file at line: Sn1.33


Possible issue in cif file at line: Ti12.00


Possible issue in cif file at line: Zr4.00


Possible issue in cif file at line: Hf4.00


Possible issue in cif file at line: Sn4.00


Possible issue in cif file at line: Ti4.00


Possible issue in cif file at line: Nd2.29


Possible issue in cif file at line: Sm2.29


Possible i

## Build model

In [610]:
structures, _, _ = dataset[0]
orig_atom_fea_len = structures[0].shape[-1]
nbr_fea_len = structures[1].shape[-1]
model = CrystalGraphConvNet(orig_atom_fea_len, nbr_fea_len,
                            atom_fea_len=64,
                            n_conv=3,
                            h_fea_len=128,
                            n_h=1,
                            classification=False)

## Loss function and optmizer

In [611]:
# Loss criterion
criterion = nn.MSELoss()

# Define optimizer
optimizer = optim.SGD(model.parameters(), lr,
                      momentum=0.9,
                      weight_decay=0)

# Learning rate decay function
scheduler = MultiStepLR(optimizer, milestones=[100],
                        gamma=0.1)

#Decays the learning rate of each parameter group by gamma once the number of epoch reaches one of the milestones. 
#Notice that such decay can happen simultaneously with other changes to the learning rate from outside this scheduler. 
#When last_epoch=-1, sets initial lr as lr.

# Train model and perform K-fold cross validation

In [612]:
num_folds = 3 # Number of folds
kfold = KFold(n_splits=num_folds, shuffle=True)
num_epochs = 3 # Max number of epochs

In [614]:
# Arrays to store error metrics for all folds
results_acc = {}
results_loss = {}
results_R2 = {}
predicted_values = []
actual_values = []
cif_id_ar = []

for fold, (train_ids, test_ids) in enumerate(kfold.split(dataset)):
    print(f'Fold: {fold}')
    print(f'Train ids: {train_ids}')
    print(f'Test ids: {test_ids}')
    print('********************************')
    
    # Extract training and validation data
    train_loader, test_loader = get_train_val_test_loader_2(
    train_ids,
    test_ids,
    dataset=dataset,
    collate_fn=collate_fn,
    batch_size=batch_size,
    num_workers=num_workers,
    pin_memory=pin_memory)
    
    # Arrays to store error metrics for epoch
    epoch_train_loss = []
    epoch_train_mae = []
    epoch_train_r2 = []
    epoch_val_loss = []
    epoch_val_r2 = []
    epoch_val_mae = []
    
    best_fold = None
    best_epoch = None
    best_mae = None
    best_r2 = None
    
    epoch = 0
    done = False
    es = EarlyStopping()
    
    while epoch<num_epochs and not done:
        # Train for one epoch
        train_loss, train_mae, train_r2 = train(train_loader, model, criterion, optimizer, epoch, normalizer)
        
        print(f'epoch: {epoch}, train loss: {train_loss}, train mae: {train_mae}')
        
        # Evaluate on validation set
        val_loss, val_mae, val_r2 = validate(test_loader, model, criterion, normalizer)
        
        epoch_train_loss.append(train_loss)
        epoch_train_mae.append(train_mae)
        epoch_train_r2.append(train_r2)
        epoch_val_loss.append(val_loss)
        epoch_val_mae.append(val_mae)
        epoch_val_r2.append(val_r2)
        
        # Change learning rate
        scheduler.step()
        
        save_path = f'./{results_dir}/model-fold-{fold}.pth.tar'
        # Check early step requirement
        if es(model, val_loss): 
            done = True
            best_fold = fold
            best_epoch = epoch
            best_mae = val_mae
            best_r2 = val_r2
            torch.save(model.state_dict(), save_path)
        else:
            if best_mae == None:
                print('None')
                best_mae = val_mae
                best_fold = fold
                best_epoch = epoch
                best_r2 = val_r2
                torch.save(model.state_dict(), save_path)
            elif val_mae < best_mae:
                print('Is best')
                best_fold = fold
                best_epoch = epoch
                best_mae = val_mae
                best_r2 = val_r2
                torch.save(model.state_dict(), save_path)
        print(f'epoch: {epoch}, validation loss: {val_loss}, validation mae: {val_mae}, Estop:[{es.status}]')
        
        epoch += 1
        
    # Plot loss and R2 vs epoch
    epoch_acc_plotter(np.linspace(0, epoch-1, epoch), epoch_train_loss, epoch_val_loss, fold, 'Loss (MSE)')
    epoch_acc_plotter_sav(np.linspace(0, epoch-1, epoch), epoch_train_loss, epoch_val_loss, fold, 'Loss (MSE)')
    epoch_acc_plotter(np.linspace(0, epoch-1, epoch), epoch_train_r2, epoch_val_r2, fold, 'R2')
    epoch_acc_plotter_sav(np.linspace(0, epoch-1, epoch), epoch_train_r2, epoch_val_r2, fold, 'R2')
    
    #Evaluation for this fold
    predictions = []
    targets = []
    test_targets = []
    test_preds = []
    test_cif_ids = []
    train_targets = []
    train_cif_ids = []
    
    mse_test = []
    mae_test = []
    r2_test = []
    
    # Load best model
    print("=> loading model '{}'".format(results_dir + '\\'+ 'model_best.pth.tar'))
    checkpoint = torch.load(save_path,
                            map_location=lambda storage, loc: storage)
    model.load_state_dict(checkpoint)
    model.eval()
    
    with torch.no_grad():

        #Iterate over the test data and generate predictions
        for i, (input, target, batch_cif_ids) in enumerate(test_loader):

            input_var = (Variable(input[0]),
                         Variable(input[1]),
                         input[2],
                         input[3])
            # normalize target
            target_normed = normalizer.norm(target)
            target_var = Variable(target_normed)

            # compute output
            output = model(*input_var)

            # Denormalize and store testing data and predictions for batch
            test_pred = normalizer.denorm(output.data.cpu())
            test_target = target
            test_preds += test_pred.view(-1).tolist()
            test_targets += test_target.view(-1).tolist()
            test_cif_ids += batch_cif_ids

            # Measure error for batch
            loss = criterion(output, target_var)
            mae_error = mae(test_pred, test_target)
            r2score = R2Score()
            r_2 = r2score(test_pred, test_target)

            # Update batch loss info
            mse_test.append(loss.item())
            mae_test.append(mae_error.item())
            r2_test.append(r_2.item())

        # Calculate average error metrics for epoch
        mse_test_fold = np.mean(mse_test)
        mae_test_fold = np.mean(mae_test)
        r2_test_fold = np.mean(r2_test)
    
    print(f'Loss for fold {fold}: {mse_test_fold} ')
    print(f'MAE for fold {fold}: {mae_test_fold}')
    print(f'R2 for fold {fold}: {r2_test_fold}')
    print('------------------------------------')
    
    # Save results for each fold
    results_acc[fold] = mae_test_fold
    results_R2[fold] = r2_test_fold
    
    for val in test_preds:
        predicted_values.append(val)
    for val in test_targets:
        actual_values.append(val)
    for val in test_cif_ids:
        cif_id_ar.append(val)

    for i, (_, train_target, batch_train_cif_ids) in enumerate(train_loader):
        train_targets += train_target.view(-1).tolist()
        train_cif_ids += batch_train_cif_ids
        
    test_results_df = pd.DataFrame({"CIF ID":test_cif_ids, "Predicted value": test_preds, "Actual value": test_targets})
    train_data_df = pd.DataFrame({"CIF ID":train_cif_ids, "Train targets": train_targets})
    pred_acc_plotter(test_results_df, 'Actual value', 'Predicted value', fold, r2_test_fold, mae_test_fold, f'{fold}_Test', save=True)


    data_ditrb_plotter(fold, train_data_df, test_results_df)

    #Write results to csv file
    file_name = f'test_results_{fold}.csv'
    out_cif_id = pd.DataFrame({'CIF ID':test_cif_ids})
    out_test_targets = pd.DataFrame({'Actual TC':test_targets})
    out_pred_targets = pd.DataFrame({'Predicted values':test_preds})
    frames = [out_cif_id, out_test_targets, out_pred_targets]
    out_file = pd.concat(frames, axis=1)
    out_file.to_csv(results_dir + '\\' + file_name, header=False)  
    
#Model evaluation
#Print fold results
print(f'K-FOLD CROSS VALIDATION RESULTs FOR {num_folds} FOLDS')
print('----------------------')

sum = 0.0
temp_list = []
for key, value in results_acc.items():
    print(f'Fold MAE{key}: {value}')
    temp_list.append(value)
    sum += value

average_acc = float(sum/len(results_acc.items()))
std_dev_acc = np.std(temp_list)
print(f'Average MAE: {average_acc}%')
print(f'Std. Dev MAE: {std_dev_acc}')

sum =0.0
temp_list = []
for key, value in results_R2.items():
    print(f'Fold R2{key}: {value}%')
    temp_list.append(value)
    sum += value
    
average_R2 = float(sum/len(results_R2.items()))
std_dev_R2 = np.std(temp_list)
print(f'Average R2: {average_R2}')
print(f'Std. Dev R2: {std_dev_R2}')

results_df = pd.DataFrame({"CIF ID":cif_id_ar, "Predicted value": predicted_values, "Actual value": actual_values})
pred_acc_plotter(results_df, 'Actual value', 'Predicted value', num_folds, average_R2, average_acc, 'Test for all folds', save=True)

#Write final results to CSV
file_name = 'test_results.csv'

error_val = pd.DataFrame([num_folds, num_epochs, average_acc, std_dev_acc, average_R2, std_dev_R2, best_fold, best_epoch],\
                        ['Number of folds: ', 'Number of epochs: ', 'Average MAE: ', 'Std. Dev. of MAE: ',\
                        'Average R2: ', 'Std. Dev. of R2: ', 'Best fold: ', 'Best epoch: '])

error_val.to_csv(results_dir + '\\' + file_name, mode='a', header=False)

Fold: 0
Train ids: [ 0  1  2  4  5  7  9 11 12 13 14 15 17 19]
Test ids: [ 3  6  8 10 16 18 20]
********************************
epoch: 0, train loss: 0.6064429879188538, train mae: 0.21456222236156464
None
epoch: 0, validation loss: 1.4027634859085083, validation mae: 0.3392624258995056, Estop:[0/10]
epoch: 1, train loss: 0.6282728314399719, train mae: 0.21435324847698212
Is best
epoch: 1, validation loss: 1.3462551832199097, validation mae: 0.33555489778518677, Estop:[0/10]
epoch: 2, train loss: 0.6202892661094666, train mae: 0.21172796189785004
Is best
epoch: 2, validation loss: 1.2553037405014038, validation mae: 0.3286789357662201, Estop:[0/10]


=> loading model 'KFold_Results\model_best.pth.tar'
Loss for fold 0: 1.2553037405014038 
MAE for fold 0: 0.3286789059638977
R2 for fold 0: -0.02545011043548584
------------------------------------


Fold: 1
Train ids: [ 0  2  3  4  5  6  7  8 10 12 13 16 18 20]
Test ids: [ 1  9 11 14 15 17 19]
********************************
epoch: 0, train loss: 0.6556673049926758, train mae: 0.22508594393730164
None
epoch: 0, validation loss: 0.9700141549110413, validation mae: 0.28133106231689453, Estop:[0/10]
epoch: 1, train loss: 0.5443796515464783, train mae: 0.20420575141906738
epoch: 1, validation loss: 1.1086162328720093, validation mae: 0.2906317114830017, Estop:[1/10]
epoch: 2, train loss: 0.4623851776123047, train mae: 0.18077945709228516
epoch: 2, validation loss: 1.3196688890457153, validation mae: 0.2988846004009247, Estop:[2/10]


=> loading model 'KFold_Results\model_best.pth.tar'
Loss for fold 1: 0.9700141549110413 
MAE for fold 1: 0.28133106231689453
R2 for fold 1: -0.015603423118591309
------------------------------------


Fold: 2
Train ids: [ 1  3  6  8  9 10 11 14 15 16 17 18 19 20]
Test ids: [ 0  2  4  5  7 12 13]
********************************
epoch: 0, train loss: 0.6434818506240845, train mae: 0.22233350574970245
None
epoch: 0, validation loss: 0.4919370710849762, validation mae: 0.19639237225055695, Estop:[0/10]
epoch: 1, train loss: 0.6446515917778015, train mae: 0.21479251980781555
Is best
epoch: 1, validation loss: 0.4405387043952942, validation mae: 0.18400849401950836, Estop:[0/10]
epoch: 2, train loss: 0.6403033137321472, train mae: 0.21420451998710632
Is best
epoch: 2, validation loss: 0.4305836260318756, validation mae: 0.1818106472492218, Estop:[0/10]


=> loading model 'KFold_Results\model_best.pth.tar'
Loss for fold 2: 0.4305836260318756 
MAE for fold 2: 0.1818106472492218
R2 for fold 2: 0.033904850482940674
------------------------------------


K-FOLD CROSS VALIDATION RESULTs FOR 3 FOLDS
----------------------
Fold MAE0: 0.3286789059638977
Fold MAE1: 0.28133106231689453
Fold MAE2: 0.1818106472492218
Average MAE: 0.2639402051766713%
Std. Dev MAE: 0.061206768626375425
Fold R20: -0.02545011043548584%
Fold R21: -0.015603423118591309%
Fold R22: 0.033904850482940674%
Average R2: -0.0023828943570454917
Std. Dev R2: 0.02597228820567709
