In [1]:
import fcntl
import os
import platform
from tkinter import Tk
import time

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from tqdm.notebook import tqdm
import scipy.stats as stats
from sklearn.cluster import KMeans

import sys
sys.path.insert(0, os.path.abspath('.'))
from tools import Sim_Parameters, Train_Parameters, create_dataset, RMSELoss, train_val_test, test_epoch, simulator, ballsINTObins, load_data, NoisyDataset


In [2]:


def solve_for_B(A, C):
    # Calculate the Moore-Penrose pseudoinverse of matrix A
    A_pseudo_inv = np.linalg.pinv(A)
    
    # Calculate matrix B
    B = np.dot(A_pseudo_inv, C)
    
    return B


substance_ind_list = [0, 1, 2, 3]
substance_ind_list.sort()

basis_func_ind_list = [0, 1, 4, 6]
# basis_func_ind_list = [0, 1, 2, 3, 4]
basis_func_ind_list.sort()

temp_K_list = np.asarray([293.15])
temp_K_list.sort()


air_trans, basis_funcs, spectra, substances_emit = load_data(air_trans_file='./data/Test 3 - 4 White Powers/Air transmittance.xlsx',
                                                             basis_func_file='./data/Test 3 - 4 White Powers/Basis functions_4-20um.xlsx',
                                                             spectra_file='./data/Test 3 - 4 White Powers/white_powders_spectra.xlsx', 
                                                             substances_emit_file='./data/Test 3 - 4 White Powers/white_powders.xlsx',)


substance_names = np.array(pd.read_excel('./data/Test 3 - 4 White Powers/white_powders_names.xlsx', header=None))

b_f_comb = [0, 1, 2]
comb = b_f_comb
temp_K = 293.15

sim_params = Sim_Parameters(air_trans=air_trans,
                            air_RI=1,
                            atm_dist_ratio=0.11,
                            basis_funcs=basis_funcs,
                            basis_func_comb=comb,
                            substance_ind_list=substance_ind_list,
                            spectra=spectra,
                            substances_emit=substances_emit,
                            temp_K=temp_K)

dataset = create_dataset(sim_params)

In [3]:
sensor_output_list = []

for idx in substance_ind_list:
    sub_signal = substances_emit[:, idx]
    sub_signal = np.expand_dims(sub_signal, 1)
    
    out = simulator(sim_params, sub_signal)  # Apply simulator() function to the column
    sensor_output_list.append(out)

sensor_output_list = np.asarray(sensor_output_list).transpose()
print(sensor_output_list.shape)


(3, 4)


In [4]:
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size

torch.manual_seed(28)
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

# test_noise_perc_list = [0, 0.01, 0.03, 0.05, 0.1, 0.2, 0.4, 1, 2]
test_noise_perc_list = [i * 0.05 for i in range(41)]

In [5]:
results = []

for test_noise_perc in test_noise_perc_list:
    train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])
    if test_noise_perc != 0:
        test_dataset = NoisyDataset(test_dataset, test_noise_perc)


    dataloader = DataLoader(test_dataset, batch_size=1, shuffle=False)

    loss_list = []
    targ_list = []
    pred_list = []

    

    for batch_idx, (x, y) in enumerate(dataloader):
        print(batch_idx)
        print('x - C - mixture - Norm', x)
        print('y - B - ratio', y)

        print('A - shape', sensor_output_list.shape)
        print('B - shape', y.numpy().transpose().shape)
        print('C - shape', x.numpy().transpose().shape)

        X = np.matmul(sensor_output_list, y.numpy().transpose())
        print('Cal C\n', X.transpose())
        print('Cal C - shape', X.shape)
        print('C original', x.numpy())

        Cal_B_by_Cal_C = solve_for_B(sensor_output_list, X)
        print('Cal B by Cal C\n', Cal_B_by_Cal_C)

        # prediction = solve_for_B(sensor_output_list, x.numpy().transpose())
        prediction = np.linalg.lstsq(sensor_output_list, x.numpy().transpose(), rcond=None)[0]
        print('pred', prediction.transpose())
        print('sum of pred', np.sum(prediction))
        normalized_prediction = prediction / np.sum(prediction)
        prediction = normalized_prediction
        print('pred', prediction.transpose())
        print('sum of pred', np.sum(prediction))

        criterion = nn.L1Loss()

        loss = criterion(torch.from_numpy(prediction.transpose()), y)
        print('loss', loss)
        loss_list.append(loss.numpy())
        targ_list.append(np.squeeze(y.numpy()))
        pred_list.append(np.squeeze(prediction))

    avg_loss = np.mean(loss_list)
    targ_list = np.asarray(targ_list)
    pred_list = np.asarray(pred_list)
    diff_list = pred_list - targ_list

    print('diff_list', diff_list)


    row = {
        'Temperature_K': temp_K,
        'Substance Number': len(substance_ind_list),
        'Substance Comb': tuple(substance_ind_list),
        'Basis Function Number': len(comb),
        'Comb of Basis Functions': comb,
        'Train Noise Max Percentage': 0,
        'Test Noise Max Percentage': test_noise_perc,
        'L1Loss': avg_loss
    }



    perc95_interval = []
    for i, sub_ind in enumerate(substance_ind_list):

        data = diff_list[:, i]
        # params = stats.norm.fit(data)
        # data_mean, data_std = params
        ae, loce, scalee = stats.skewnorm.fit(data)
        
        # Create a skewed normal distribution object
        dist = stats.skewnorm(ae, loce, scalee)


        for confidence_perc in (0.68, 0.95, 0.997):
            # Calculate the quantiles for a specific percentage
            lower_quantile = dist.ppf(0.5-confidence_perc/2)  # Lower quantile covering % of the data
            upper_quantile = dist.ppf(0.5+confidence_perc/2)  # Upper quantile covering % of the data

            if confidence_perc == 0.95:
                perc95_interval.append(upper_quantile - lower_quantile)

    perc95_avg = np.mean(perc95_interval)
    row[f'AVG skewnorm 95% interval range'] = perc95_avg


    # Append the row to the results list
    results.append(row)
# Create a pandas DataFrame from the results list
df = pd.DataFrame(results)



# df.to_csv('.csv', index=False)

output_file_name = 'loss_test_least_sqr.csv'
# Check if the file "loss.pkl" already exists
if os.path.isfile(output_file_name):

    # If the file exists, load the existing DataFrame from the file
    df_existing = pd.read_csv(output_file_name)

    # Open the file in binary mode
    with open(output_file_name, 'w') as f:
        while True:
            try:
                # Get a lock on the file
                fcntl.flock(f, fcntl.LOCK_EX | fcntl.LOCK_NB)
                break
            except IOError:
                # Lock is held by another process, wait and try again
                time.sleep(1)

        # Merge df to df_existing, and if there's duplicates, keep the new values from df
        df = pd.concat([df, df_existing]).drop_duplicates(
            subset=['Temperature_K', 'Substance Number', 'Substance Comb', 'Basis Function Number', 
                    'Comb of Basis Functions', 'Train Noise Max Percentage', 'Test Noise Max Percentage'], keep='first')
        df = df.sort_values(by=list(df.columns))
        print(df)

        # Save the DataFrame to a CSV file "loss.csv"
        df.to_csv(output_file_name, index=False)

        # Release the lock on the file
        fcntl.flock(f, fcntl.LOCK_UN)
else:
    # Save the DataFrame to a CSV file "loss.csv"
    df.to_csv(output_file_name, index=False) 



0
x - C - mixture - Norm tensor([[14.2358, 15.1640, 15.3628]])
y - B - ratio tensor([[0.1000, 0.0000, 0.3000, 0.6000]])
A - shape (3, 4)
B - shape (4, 1)
C - shape (3, 1)
Cal C
 [[14.23580247 15.16403458 15.36281698]]
Cal C - shape (3, 1)
C original [[14.235802 15.164034 15.362817]]
Cal B by Cal C
 [[ 0.10995088]
 [-0.00308591]
 [ 0.28046611]
 [ 0.60720512]]
pred [[ 0.1099506  -0.00308601  0.28046605  0.60720531]]
sum of pred 0.9945359565005008
pred [[ 0.11055468 -0.00310296  0.28200695  0.61054133]]
sum of pred 1.0
loss tensor(0.0105, dtype=torch.float64)
1
x - C - mixture - Norm tensor([[12.8861, 13.4344, 13.4347]])
y - B - ratio tensor([[0.0000, 0.6000, 0.1000, 0.3000]])
A - shape (3, 4)
B - shape (4, 1)
C - shape (3, 1)
Cal C
 [[12.88609012 13.43440055 13.43465458]]
Cal C - shape (3, 1)
C original [[12.886089 13.4344   13.434654]]
Cal B by Cal C
 [[0.03017063]
 [0.59064367]
 [0.04077403]
 [0.32184554]]
pred [[0.03017016 0.59064412 0.04077375 0.32184561]]
sum of pred 0.9834336449868