In [None]:
# Relevant libraries
import pandas as pd
import scipy.optimize as sc
import scipy.stats as st
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from scipy.stats import f
import statsmodels.api as sm
import numpy as np
import matplotlib.pyplot as plt

# Load data from CSV file
file2 = pd.read_csv("../Output/allDataEver.csv")
# Extract the columns from the data
#file2=file2[file2['Target PPM']<=500]

file2=file2[file2['Target PPM']!=150]
# Separate the independent and dependent variables
X = file2.drop('Target PPM', axis='columns')
y = file2.loc[:, 'Target PPM']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Funk Equation
# a*R+b
# a*np.exp(-1*R*b+c)+d
# a*R**b+c
# a*R**b+c*H*(a*R**b+c)+d  (Basically Bastviken)
# (a*np.exp(-1*R*b+c)+d)+f*H*(a*np.exp(-1*R*b+c)+d)+g
# a*np.exp(-1*R*b+c)+d*np.exp(-1*H*f+g)+h (Funk Equation)
# a*np.exp((-1*R*b+c)+(-1*H*d+e))+f 4.2
#                                                           W/1000  W/500   UV500   UV1000
# a**((-1*R*b+c)+(-1*H*d+e))+f 6.4                          87.79   42.64   62.74   116.35
# a**((-1*R*b)+(-1*H*c)+d)+e   6.5                          87.79

# a**((((-1*R)/(H**b))*c)+(-1*H*d)+e)+f 7.1                 74.09   35.76   62.37   113.46


# a**((((-1*R)/(H**b))*c)+(-1*H*d)+(-1*T*e)+f)+g 8.1
# a**((((-1*R)/(H**b))*c)+(-1*H*d)+(-1*T*e)+(((-1)/(T*f*H**g))*h)+i)+j 8.2
# a**((((-1*R)/(H**b))*c)+(-1*H*d)+(-1*T*e)+(((-1*T*f)/(H**g))*h)+i)+j 8.3                           5.764

#a**((((-1*R)/(H**b))*c)+(-1*H*d)+(-1*T*e)+(((-1*T*f)/(H**g))*h)+i)+j*np.exp(-1*T*k)+l 9.1


#0.97**((((-1*R)/(H**(-0.66)))*c)+(-1*H)+(1.21*T)+(((-1.22*T)/(H**0.23))*1.25)+-178.26)+j            5.865



def funkEQ(X, a, b,c,d,e,f,g,h,i,j,k,l):
    R, H, T = X
    with np.errstate(over='ignore'):
        stuff=a**((((-1*R)/(H**b))*c)+(-1*H*d)+(-1*T*e)+(((-1*T*f)/(H**g))*h)+i)+j*np.exp(-1*T*k)+l
    return stuff
p0 = 1, 1,1,1,1,1,1,1,1,1,1,1

def residual(params, X, y):
    return np.sum((y - funkEQ(X, *params))**2)

# Perform curve fitting using the funct function, xDataResistance, and yDataTargetPPM data
# Set the maximum number of function evaluations to 1000000
for s in [0, 3, 5, 7, 8, 10, 13, 15]:
    file2=file2[file2['SensorID']==s]
    xDataResistance = file2.loc[:, 'Resistance']
    xDataRatio = file2.loc[:, 'Ratio']
    xDataRH = file2.loc[:, 'RelativeHumidity']
    xDataTemp = file2.loc[:, 'Temperature']
    yDataTargetPPM = file2.loc[:, 'Target PPM']
    popt, pcov = sc.curve_fit(funkEQ, (xDataResistance, xDataRH, xDataTemp), yDataTargetPPM, p0, maxfev=1000000)

# Print the optimized parameters as [a b c d e f]
    print(popt)

#########
# Getting RMSE of Equation
#########

R_test = X_test.loc[:, 'Resistance']
H_test = X_test.loc[:, 'RelativeHumidity']
T_test = X_test.loc[:, 'Temperature']

# Generate the predictions using the optimized parameters and the input data
y_pred = funkEQ((R_test, H_test, T_test), *popt)

# Calculate the mean squared error
testVal = mean_squared_error(y_test, y_pred, squared=False)
print(testVal)

#########
# All the code needed to display predicted vs actual graphs
#########

# Degrees of freedom
df = len(yDataTargetPPM) - len(popt)

# Student's t value for the 95% confidence level
t_val = t.ppf(1-0.05/2, df)

# Compute the standard error of the parameters
perr = np.sqrt(np.diag(pcov))

# Compute the standard error of the predictions
def predict_std(X, popt, perr):
    jac = np.array([funkEQ(X, *popt[:i], 1 if j == i else 0, *popt[i+1:]) for i,j in enumerate(range(len(popt)))]).T
    return np.sqrt(np.sum((jac * perr) ** 2, axis=1))

# Compute the 95% confidence intervals for the predicted values
predict_std_val = predict_std((R_test, H_test, T_test), popt, perr)
conf_int_lower = y_pred - t_val * predict_std_val
conf_int_upper = y_pred + t_val * predict_std_val
