This class describes data regarding single sample - it contains measurements of PSI and DEL for 3 different angles of incidence (65, 70, 75 degrees) and the wavelength of the light used in the experiment. It is meant to provide functions that will allow for making predictions regarding single sample. It is meant to be used only for samples that will be used for training - it assumes that values of thickness and A, B, C parameters are contained within file name.

In [3]:
import os
import import_ipynb
import numpy as np
import pandas as pd
import torch
from sklearn.datasets import load_sample_image
import model_creator


class training_sample:

    def __init__(self, file_path):
        # Initialize the training_sample object by decoding the file name to extract parameters
        # and loading the data from the file.
        self.T = self.decode_sample(file_path)[0]
        self.A = self.decode_sample(file_path)[1]
        self.B = self.decode_sample(file_path)[2]
        self.C = self.decode_sample(file_path)[3]
        self.data = self.load_data(file_path, self.T, self.A, self.B, self.C)


    def decode_sample(self, filename):
        """
        Decodes the file name to extract sample parameters (T, A, B, C).
    
        Parameters:
        - filename: str, the name of the file to decode.
    
        Returns:
        - list: A list containing the extracted parameters [T, A, B, C].
        """
        filename = os.path.basename(filename)
        info = filename.split("_")
        T = info[0]
        A = info[1]
        B = info[2]
        C = info[3]
        C = C.removesuffix(".txt")
        return [T, A, B, C]


    def load_data(self, filename, T, A, B, C):
        """
        Loads the data from the file, cleans it, and adds the sample parameters as columns.
    
        Parameters:
        - filename: str, the name of the file to load.
        - T: str, sample parameter T.
        - A: str, sample parameter A.
        - B: str, sample parameter B.
        - C: str, sample parameter C.
    
        Returns:
        - pandas.DataFrame: The cleaned and processed data with added sample parameters.
        """
        dataHelper = pd.read_csv(filename, sep='\t', header=None, index_col=False)
        dataHelper = dataHelper.drop(index=[0])
        dataHelper = dataHelper.drop(columns=[7])
        dataHelper.columns = ['wavelength', 'psi65', 'del65', 'psi70', 'del70', 'psi75', 'del75']
        dataHelper['T'] = T
        dataHelper['A'] = A
        dataHelper['B'] = B
        dataHelper['C'] = C
        return dataHelper

    def return_as_2dlist(self, feature_columns = ['wavelength', 'psi65', 'del65', 'psi70', 'del70', 'psi75', 'del75'], target_columns = ['T', 'A', 'B', 'C']):

        """
        Returns the features and targets as separate 2D lists.
    
        Parameters:
        - feature_columns: list, names of the columns to use as features.
        - target_columns: list, names of the columns to use as targets.
    
        Returns:
        - tuple: A tuple containing two 2D lists (features, targets).
        """
        features = self.data[feature_columns]
        targets = self.data[target_columns]
        return features, targets

    def return_as_tensors(self, feature_columns = ['wavelength', 'psi65', 'del65', 'psi70', 'del70', 'psi75', 'del75'], target_columns = ['T', 'A', 'B', 'C']):
        """
        Converts the features and targets into PyTorch tensors.
    
        Parameters:
        - feature_columns: list, names of the columns to use as features.
        - target_columns: list, names of the columns to use as targets.
    
        Returns:
        - tuple: A tuple containing two PyTorch tensors (features, targets).
        """
        features = self.data[feature_columns]
        targets = self.data[target_columns]
        features = torch.from_numpy(features.to_numpy(dtype=np.float32))
        targets = torch.from_numpy(targets.to_numpy(dtype=np.float32))
        return features, targets

    def return_as_flat_df(self, feature_columns = ['wavelength', 'psi65', 'del65', 'psi70', 'del70', 'psi75', 'del75'], target_columns = ['T', 'A', 'B', 'C']):
        """
        Flattens the features and targets into a single row and returns them as DataFrames.
    
        Parameters:
        - feature_columns: list, names of the columns to use as features.
        - target_columns: list, names of the columns to use as targets.
    
        Returns:
        - tuple: A tuple containing two pandas DataFrames (features, targets).
        """
        features = self.data[feature_columns]
        targets = self.data[target_columns]
        targets = targets.iloc[:1]
        features = features.values.reshape(1, -1)
        targets = targets.values.reshape(1, -1)
        features = pd.DataFrame(features)
        targets = pd.DataFrame(targets)
        return features, targets

    def return_as_flat_tensors(self,feature_columns=['wavelength', 'psi65', 'del65', 'psi70', 'del70', 'psi75', 'del75'],target_columns=['T', 'A', 'B', 'C']):

        """
        Flattens the features and targets into a single row and returns them as PyTorch tensors.
    
        Parameters:
        - feature_columns: list, names of the columns to use as features.
        - target_columns: list, names of the columns to use as targets.
    
        Returns:
        - tuple: A tuple containing two PyTorch tensors (features, targets).
        """
        features = self.data[feature_columns]
        targets = self.data[target_columns]
        features = torch.from_numpy(features.to_numpy(dtype=np.float32))
        targets = torch.from_numpy(targets.to_numpy(dtype=np.float32))
        features = features.reshape(1, -1)
        targets = targets[:1]
        targets = targets.reshape(1, -1)
        return features, targets

    def features_as_tensors(self, feature_columns = ['wavelength', 'psi65', 'del65', 'psi70', 'del70', 'psi75', 'del75']):

        """
        Converts the features into PyTorch tensors.
    
        Parameters:
        - feature_columns: list, names of the columns to use as features.
    
        Returns:
        - torch.Tensor: A PyTorch tensor containing the features.
        """
        features = self.data[feature_columns]
        features = torch.from_numpy(features.to_numpy(dtype=np.float32))
        return features

    def predict_mean(self, model_name, features, output_size):


        """
        Predicts the mean of the model's output for the given features.
    
        Parameters:
        - model_nameh: str, name of model file.
        - features: pandas.DataFrame, the features to use for prediction.
        - output_size: int, the size of the model's output.
    
        Returns:
        - float: The mean of the model's predictions.
        """
        model = model_creator.MLP.create_and_load(model_name,len(features), output_size)
        model.eval()
        features_as_tensors = self.features_as_tensors(features)
        with torch.no_grad():
            predictions = model(features_as_tensors)
        predictions = predictions.flatten().tolist()
        mean = np.mean(predictions)
        return mean

    def return_standarized(self, scaler, columns):

        """
        Standardizes the specified columns of the data using the provided scaler.
    
        Parameters:
        - scaler: sklearn scaler object, the scaler to use for standardization.
        - columns: list, names of the columns to standardize.
    
        Returns:
        - numpy.ndarray: The standardized data.
        """
        data = self.data[columns]
        data = scaler.transform(data)
        return data

    def get_sample_info(self):
        """
        Returns the sample parameters (T, A, B, C) as a tuple.
    
        Parameters:
        - None
    
        Returns:
        - tuple: A tuple containing the sample parameters (T, A, B, C).
        """
        return self.T, self.A, self.B, self.C

    def print_sample_info(self):

        """
        Prints the sample parameters (T, A, B, C).
    
        Parameters:
        - None
    
        Returns:
        - None
        """
        print(f'T: {self.T}, A: {self.A}, B: {self.B}, C: {self.C}')


    def predict_median(self, model_name, features, output_size):
        """
        Predicts the median of the model's output for the given features.
    
        Parameters:
        - model_name: str, name of model file
        - features: pandas.DataFrame, the features to use for prediction.
        - output_size: int, the size of the model's output.
    
        Returns:
        - float: The median of the model's predictions.
        """
        model = model_creator.MLP.create_and_load(model, features.shape[1], output_size)
        model.eval()
        features_as_tensors = self.features_as_tensors(features)
        with torch.no_grad():
            predictions = model(features_as_tensors)
        predictions = predictions.flatten().tolist()
        median = np.median(predictions)
        return median

    def plot_polynomial_fit(data, x_column, y_column, degree):
        """
        Approximates the relationship between two columns in the data with a polynomial of a given degree
        and plots the result.
    
        Parameters:
        - data: pandas DataFrame containing the data.
        - x_column: str, name of the column to use as the X-axis.
        - y_column: str, name of the column to use as the Y-axis.
        - degree: int, degree of the polynomial to fit.
    
        Returns:
        - None
        """
        # Extract X and Y data
        X = data[x_column].values.reshape(-1, 1)
        Y = data[y_column].values
    
        # Create polynomial features
        poly = PolynomialFeatures(degree=degree)
        X_poly = poly.fit_transform(X)
    
        # Fit the polynomial regression model
        model = LinearRegression()
        model.fit(X_poly, Y)
    
        # Generate predictions
        X_range = np.linspace(X.min(), X.max(), 500).reshape(-1, 1)
        X_range_poly = poly.transform(X_range)
        Y_pred = model.predict(X_range_poly)
    
        # Plot the data and the polynomial fit
        plt.figure(figsize=(8, 6))
        plt.scatter(X, Y, color='blue', label='Data', alpha=0.7)
        plt.plot(X_range, Y_pred, color='red', label=f'Polynomial Fit (degree={degree})')
        plt.xlabel(x_column)
        plt.ylabel(y_column)
        plt.title(f'Polynomial Fit of {y_column} vs {x_column}')
        plt.legend()
        plt.grid(True)
        plt.show()

    def plot_columns(data, x_column, y_column):
        """
        Plots two columns from the data without any fitting.
    
        Parameters:
        - data: pandas DataFrame containing the data.
        - x_column: str, name of the column to use as the X-axis.
        - y_column: str, name of the column to use as the Y-axis.
    
        Returns:
        - None
        """
        # Extract X and Y data
        X = data[x_column]
        Y = data[y_column]
    
        # Plot the data
        plt.figure(figsize=(8, 6))
        plt.scatter(X, Y, color='blue', label='Data', alpha=0.7)
        plt.xlabel(x_column)
        plt.ylabel(y_column)
        plt.title(f'{y_column} vs {x_column}')
        plt.legend()
        plt.grid(True)
        plt.show()



