In [1]:
# Relevant libraries
import pandas as pd
import scipy.stats as st
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from scipy.optimize import OptimizeWarning
import numpy as np
from scipy.optimize import curve_fit
import multiprocessing
from joblib import Parallel, delayed
from tqdm import tqdm
import warnings


In [2]:
# Load data from CSV file
file2 = pd.read_csv("../Output/LowPPMMatrix.csv")
# Extract the columns from the data
#file2=file2[file2['Target PPM']<=500]

#file2=file2[file2['Target PPM']!=150]
#file2=file2[file2['Target PPM']!=50]
xDataResistance = file2.loc[:, 'Resistance']
xDataRatio = file2.loc[:, 'Ratio']
xDataRH = file2.loc[:, 'RelativeHumidity']
xDataTemp = file2.loc[:, 'Temperature']
yDataTargetPPM = file2.loc[:, 'Target PPM']

#calculate abso.lute humidity in g/m^3
P_actual_hPa = .8 * 1013.25
e_sat_standard = 6.112 * np.exp((17.67 * xDataTemp) / (xDataTemp + 243.5))
e_sat_actual = e_sat_standard * (P_actual_hPa / 1013.25)
xDataAH = 1000*((xDataRH/100)*e_sat_actual)/(461.5*(xDataTemp+ 273.15))

# Separate the independent and dependent variables
X = file2.drop('Target PPM', axis='columns')
y = file2.loc[:, 'Target PPM']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Ignore all warnings in the code
warnings.filterwarnings("ignore")

def funkEQ(X, a, b,c,d,e,f):
    R, H = X
    with np.errstate(over='ignore'):
        stuff=a*np.exp((((-1*R)/(H**b))*c)+(-1*H*d)+e)+f
    return stuff

def optimize_parameters(p0):
    try:
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore', category=RuntimeWarning)
            warnings.filterwarnings('ignore', category=OptimizeWarning)

            popt, pcov = curve_fit(funkEQ, (xDataResistance, xDataRH), yDataTargetPPM, p0=p0, maxfev=1000000)

        return (p0, popt, np.sum((yDataTargetPPM - funkEQ((xDataResistance, xDataRH), *popt))**2))
    except Exception as e:
        print(f"Error with parameters {p0}: {str(e)}")
        return None

# Set the random seed for reproducibility
np.random.seed()

# Number of points to generate
num_points = 100000

initial_params = np.random.randint(-2, 2, size=(num_points, 6))

num_cores = multiprocessing.cpu_count()

results = Parallel(n_jobs=num_cores)(delayed(optimize_parameters)(p) for p in tqdm(initial_params))

# Filter out None, NaN, and Infinity results
successful_results = [result for result in results
                      if result is not None
                      and not np.any(np.isnan(result[1]))
                      and not np.any(np.isinf(result[1]))]

init_params, fitted_params, residuals = zip(*successful_results)

# Rank the parameters by their residual and only keep the top 10
best_params = np.array(fitted_params)[np.argsort(residuals)[:10]]
best_init = np.array(init_params)[np.argsort(residuals)[:10]]

# The best parameters are the first in the sorted list
#print('Fitted function parameters: a=%.3f, b=%.3f, c=%.3f d=%.3f, f=%.3f, g=%.3f' % (best_params[0][0], best_params[0][1], best_params[0][2], best_params[0][3], best_params[0][4], best_params[0][5]))

print('The initial conditions to get this optimization were: '+str(best_init[0]))

# Calculate RMSE for the best parameters
popt=best_params[0]
#best_params[0]=[18,  1,  4, 12,  0,  4, -7]
# file2=file2[file2["SensorID"]==3]
# X = file2.drop('Target PPM', axis='columns')
# y = file2.loc[:, 'Target PPM']
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Extract the Resistance column from the X_test dataframe
R_test = X_test.loc[:, 'Resistance']
H_test = X_test.loc[:, 'RelativeHumidity']
T_test = X_test.loc[:, 'Temperature']
#popt = [38.538220,0.843161,4.637493,380.116483,0.06614,0.298036,-78.340227]
#popt = best_params[0]
# Generate the predictions using the optimized parameters and the input data
y_pred = funkEQ((R_test, H_test), *popt)

# Calculate the mean squared error
testVal = mean_squared_error(y_test, y_pred, squared=False)
print(testVal)

# Iterate through target PPM values and calculate the 95% confidence interval for each value
for i in range(0, 1001, 200):

    # Filter the data for the current target PPM value
    df = file2[file2['Target PPM'] == i]

    # Calculate the model output using the filtered data and optimized parameters
    myData = funkEQ((df.loc[:, 'Resistance'], df.loc[:, 'RelativeHumidity']), *popt)

    # Calculate the 95% confidence interval using the t-distribution
    low95, high95 = st.t.interval(0.95, len(myData)-1, loc=np.mean(myData), scale=st.sem(myData))


    # Print the 95% confidence interval for the current target PPM value
    print("The 95% Confidence Interval for " + str(i) + " is (" + str(low95) + ", " + str(high95) + ")")


100%|██████████| 100000/100000 [02:30<00:00, 662.89it/s]


The initial conditions to get this optimization were: [  0  -1 -20  -9  18 -19]


ValueError: Input contains NaN.