This class describes data regarding single sample - it contains measurements of PSI and DEL for 3 different angles of incidence (65, 70, 75 degrees) and the wavelength of the light used in the experiment. It is meant to provide functions that will allow for making predictions regarding single sample. It is meant to be used only for samples that will be used for training - it assumes that values of thickness and A, B, C parameters are contained within file name.

In [3]:
import os
import import_ipynb
import numpy as np
import pandas as pd
import torch
from sklearn.datasets import load_sample_image
import model_creator


class training_sample:

    def __init__(self, file_path):
        # Initialize the training_sample object by decoding the file name to extract parameters
        # and loading the data from the file.
        self.T = self.decode_sample(file_path)[0]
        self.A = self.decode_sample(file_path)[1]
        self.B = self.decode_sample(file_path)[2]
        self.C = self.decode_sample(file_path)[3]
        self.data = self.load_data(file_path, self.T, self.A, self.B, self.C)


    def decode_sample(self, filename):
        # Decode the file name to extract sample parameters (T, A, B, C).
        filename = os.path.basename(filename)
        info = filename.split("_")
        T = info[0]
        A = info[1]
        B = info[2]
        C = info[3]
        C = C.removesuffix(".txt")
        return [T, A, B, C]


    def load_data(self, filename, T, A, B, C):
        # Load the data from the file, clean it, and add the sample parameters as columns.
        dataHelper = pd.read_csv(filename, sep='\t', header=None, index_col=False)
        dataHelper = dataHelper.drop(index=[0])
        dataHelper = dataHelper.drop(columns=[7])
        dataHelper.columns = ['wavelength', 'psi65', 'del65', 'psi70', 'del70', 'psi75', 'del75']
        dataHelper['T'] = T
        dataHelper['A'] = A
        dataHelper['B'] = B
        dataHelper['C'] = C
        return dataHelper

    def return_as_2dlist(self, data, feature_columns = ['wavelength', 'psi65', 'del65', 'psi70', 'del70', 'psi75', 'del75'], target_columns = ['T', 'A', 'B', 'C']):
        # Return the features and targets as separate 2D lists.
        features = data[feature_columns]
        targets = data[target_columns]
        return features, targets

    def return_as_tensors(self, feature_columns = ['wavelength', 'psi65', 'del65', 'psi70', 'del70', 'psi75', 'del75'], target_columns = ['T', 'A', 'B', 'C']):
        # Convert the features and targets into PyTorch tensors.
        features = self.data=[feature_columns]
        targets = self.data[target_columns]
        features = torch.from_numpy(features.to_numpy(dtype=np.float32))
        targets = torch.from_numpy(targets.to_numpy(dtype=np.float32))
        return features, targets

    def return_as_flat_df(self, feature_columns = ['wavelength', 'psi65', 'del65', 'psi70', 'del70', 'psi75', 'del75'], target_columns = ['T', 'A', 'B', 'C']):
        features = self.data[feature_columns]
        targets = self.data[target_columns]
        targets = targets.iloc[:1]
        features = features.values.reshape(1, -1)
        targets = targets.values.reshape(1, -1)
        features = pd.DataFrame(features)
        targets = pd.DataFrame(targets)
        return features, targets

    def return_as_flat_tensors(self,feature_columns=['wavelength', 'psi65', 'del65', 'psi70', 'del70', 'psi75', 'del75'],target_columns=['T', 'A', 'B', 'C']):
        # Flatten the features and targets into a single row and return them as DataFrames.
        features = self.data[feature_columns]
        targets = self.data[target_columns]
        features = torch.from_numpy(features.to_numpy(dtype=np.float32))
        targets = torch.from_numpy(targets.to_numpy(dtype=np.float32))
        features = features.reshape(1, -1)
        targets = targets[:1]
        targets = targets.reshape(1, -1)
        return features, targets

    def features_as_tensors(self, feature_columns = ['wavelength', 'psi65', 'del65', 'psi70', 'del70', 'psi75', 'del75']):
        # Convert the features into PyTorch tensors.
        features = self.data[feature_columns]
        features = torch.from_numpy(features.to_numpy(dtype=np.float32))
        return features

    def predict_mean(self, model, features, output_size):
        # Predict the mean of the model's output for the given features.
        model = model_creator.MLP.create_and_load(model,features.shape[1], output_size)
        model.eval()
        features_as_tensors = self.features_as_tensors(features)
        with torch.no_grad():
            predictions = model(features_as_tensors)
        predictions = predictions.flatten().tolist()
        mean = np.mean(predictions)
        return mean

    def return_standarized(self, scaler, columns):
        # Standardize the specified columns of the data using the provided scaler.
        data = self.data[columns]
        data = scaler.transform(data)
        return data

    def get_sample_info(self):
        # Return the sample parameters (T, A, B, C) as a tuple.
        return self.T, self.A, self.B, self.C

    def print_sample_info(self):
        print(f'T: {self.T}, A: {self.A}, B: {self.B}, C: {self.C}')


    def predict_median(self, model, features, output_size):
        # Predict the median of the model's output for the given features.
        model = model_creator.MLP.create_and_load(model, features.shape[1], output_size)
        model.eval()
        features_as_tensors = self.features_as_tensors(features)
        with torch.no_grad():
            predictions = model(features_as_tensors)
        predictions = predictions.flatten().tolist()
        median = np.median(predictions)
        return median
  
    # def return_flattened(self):
    #     data = self.data
    #     data.drop("T", axis=1, inplace=True)
    #     data.drop("A", axis=1, inplace=True)
    #     data.drop("B", axis=1, inplace=True)
    #     data.drop("C", axis=1, inplace=True)
    #     data.values.flatten()
    #     return data





