In [3]:
import import_ipynb
import os
from scipy.stats import pearsonr
import numpy as np
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
import model_creator
from training_sample import training_sample
import locations

class training_dataset:
    def __init__(self, dataset_folder):
        self.samples = self.gather_samples(dataset_folder)





    def gather_samples(self, dataset_folder):
        all_items = os.listdir(dataset_folder)
        files = [item for item in all_items if os.path.isfile(os.path.join(dataset_folder, item))]
        samples = []
        for i in files:
            sample = training_sample(os.path.join(dataset_folder, i))
            samples.append(sample)

        return samples

    def return_as_tensors_split(self, feature_columns=['wavelength', 'psi65', 'del65', 'psi70', 'del70', 'psi75', 'del75'], target_columns = ['T']):

        df = pd.DataFrame()

        for sample in self.samples:
            df.concat(sample.data[feature_columns], ignore_index=True)

        features = df[feature_columns]
        targets = df[target_columns]

        x_train, x_test, y_train, y_test = train_test_split(features, targets, test_size=0.2, random_state=42)

        x_train = torch.from_numpy(x_train.to_numpy(dtype=np.float32))
        x_test = torch.from_numpy(x_test.to_numpy(dtype=np.float32))
        y_train = torch.from_numpy(y_train.to_numpy(dtype=np.float32))
        y_test = torch.from_numpy(y_test.to_numpy(dtype=np.float32))

        return [x_train, y_train, x_test, y_test]

    def return_as_tensors(self, columns=['wavelength', 'psi65', 'del65', 'psi70', 'del70', 'psi75', 'del75'], target_columns = ['T']):
        df = pd.DataFrame()

        for sample in self.samples:
            df = pd.concat([df, sample.data[columns + target_columns]], ignore_index=True)

        df.columns = columns + target_columns
        features = df[columns]
        targets = df[target_columns]
        features = torch.from_numpy(features.to_numpy(dtype=np.float32))
        targets = torch.from_numpy(targets.to_numpy(dtype=np.float32))
        return [features, targets]


    def get_total_r2_score(self, model, features = ['wavelength', 'psi65', 'del65', 'psi70', 'del70', 'psi75', 'del75'], targets = ['T']):
        model = model_creator.MLP.create_and_load(model, input_size=len(features), output_size=len(targets))
        model.eval()
        data = self.return_as_tensors(features, targets)
        features = data[0]
        targets = data[1]
        with torch.no_grad():
            predictions = model(features)
            predictions = predictions.flatten().tolist()

        pearson = pearsonr(predictions, targets.flatten().tolist())
        r2_score = pearson[0] ** 2
        return r2_score



folder = os.getcwd()
parent_folder = os.path.dirname(folder)
folder_path = os.path.join(parent_folder,"code_data_models","datasets", "new_Si_jaw_delta", "")
models_dir = locations.models_dir
dataset = training_dataset(folder_path)
modeldir = os.path.join(models_dir, "modelA_64_32_32_16.pth")
print(modeldir)
print(dataset.get_total_r2_score(modeldir, ['wavelength', 'psi65', 'del65', 'psi70', 'del70', 'psi75', 'del75'], ['A']))

1.0995_1.2743_0.0172944_0.0000696152.txt
1.0995 1.2743 0.0172944 0.0000696152
1.0995_1.2743_0.0172944_0.0000696152.txt
1.0995 1.2743 0.0172944 0.0000696152
1.0995_1.2743_0.0172944_0.0000696152.txt
1.0995 1.2743 0.0172944 0.0000696152
1.0995_1.2743_0.0172944_0.0000696152.txt
1.0995 1.2743 0.0172944 0.0000696152
1.1592_1.3856_0.01419929_0.0001763588.txt
1.1592 1.3856 0.01419929 0.0001763588
1.1592_1.3856_0.01419929_0.0001763588.txt
1.1592 1.3856 0.01419929 0.0001763588
1.1592_1.3856_0.01419929_0.0001763588.txt
1.1592 1.3856 0.01419929 0.0001763588
1.1592_1.3856_0.01419929_0.0001763588.txt
1.1592 1.3856 0.01419929 0.0001763588
1.2786_1.4843_0.0196751_0.000059904.txt
1.2786 1.4843 0.0196751 0.000059904
1.2786_1.4843_0.0196751_0.000059904.txt
1.2786 1.4843 0.0196751 0.000059904
1.2786_1.4843_0.0196751_0.000059904.txt
1.2786 1.4843 0.0196751 0.000059904
1.2786_1.4843_0.0196751_0.000059904.txt
1.2786 1.4843 0.0196751 0.000059904
1.2786_1.646_0.0144748_0.0000477451.txt
1.2786 1.646 0.0144748 0

ValueError: invalid literal for int() with base 10: 'data'