In [20]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import root_mean_squared_error
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np

In [21]:
#Example data for fractional charge yield.  Random values between 0 and 1
frac_charge_yield = np.random.random_sample(200)

In [22]:
#Example data for radiation softness, random values between 0.01 and 0.1
radiation_softness = np.random.random_sample(200) / 10

In [23]:
#Example data for dose, random values between 1 and 100
dose = np.random.random_sample(200) * 100

In [24]:
#Using formula excluding g, q, e(ox) and e(0)
voltage_shift = dose * radiation_softness * frac_charge_yield

In [25]:
#Create and experimental voltage shift that is is some factor greater than 1 x voltage shift to simulate the observed experimental logarithmic values
experimental_voltage_shift = voltage_shift * (1 + np.random.random_sample(200))

In [26]:
#Create Pandas DataFrame from the data
df = pd.DataFrame(data={
    'frac_charge_yield': np.round(frac_charge_yield, 4),
    'radiation_softness': np.round(radiation_softness, 4),
    'dose': np.round(dose, 4),
    'voltage_shift': np.round(voltage_shift, 4),
    'experimental_voltage_shift': np.round(experimental_voltage_shift, 4)
    
})

In [27]:
svr_model = SVR()   #Train a standard SVR model so we can evaluate best fit parameters
scaler = StandardScaler()  #Initialize Scaling function
X = df.drop(columns=['experimental_voltage_shift'])   #Separate input values from the desired output
y = df['experimental_voltage_shift']   #Isolate the desired output parameter

#Check data for potential error causing entries
print(pd.isnull(y).sum())   
print(pd.isnull(X).sum())

0
frac_charge_yield     0
radiation_softness    0
dose                  0
voltage_shift         0
dtype: int64


In [28]:
#Apply scaling function to standardize the features causing less skewed data


In [29]:
#Create a train/test split with the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=101)
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [30]:
#Parameter grid allows you to create an object outlining all the combinations of parameterization you would like to try vs the model to find best fit
param_grid = {
    'kernel': ['linear', 'poly', 'rbf'],  # Example kernels
    'C': [0.001, 0.01, 0.1, 1, 10, 100],         # Regularization parameter
    'epsilon': [0.1, 0.2, 0.5, 0.8],      # Epsilon in the epsilon-SVR model
}

In [31]:
#Initialize GridSearch with the model to find best parameterization
grid_search = GridSearchCV(estimator=svr_model, param_grid=param_grid, scoring='neg_mean_squared_error', verbose=2)

In [32]:
#Train grid search with training data sets
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 72 candidates, totalling 360 fits
[CV] END ................C=0.001, epsilon=0.1, kernel=linear; total time=   0.0s
[CV] END ................C=0.001, epsilon=0.1, kernel=linear; total time=   0.0s
[CV] END ................C=0.001, epsilon=0.1, kernel=linear; total time=   0.0s
[CV] END ................C=0.001, epsilon=0.1, kernel=linear; total time=   0.0s
[CV] END ................C=0.001, epsilon=0.1, kernel=linear; total time=   0.0s
[CV] END ..................C=0.001, epsilon=0.1, kernel=poly; total time=   0.0s
[CV] END ..................C=0.001, epsilon=0.1, kernel=poly; total time=   0.0s
[CV] END ..................C=0.001, epsilon=0.1, kernel=poly; total time=   0.0s
[CV] END ..................C=0.001, epsilon=0.1, kernel=poly; total time=   0.0s
[CV] END ..................C=0.001, epsilon=0.1, kernel=poly; total time=   0.0s
[CV] END ...................C=0.001, epsilon=0.1, kernel=rbf; total time=   0.0s
[CV] END ...................C=0.001, epsilon=0.

[CV] END ..................C=0.001, epsilon=0.8, kernel=poly; total time=   0.0s
[CV] END ..................C=0.001, epsilon=0.8, kernel=poly; total time=   0.0s
[CV] END ..................C=0.001, epsilon=0.8, kernel=poly; total time=   0.0s
[CV] END ..................C=0.001, epsilon=0.8, kernel=poly; total time=   0.0s
[CV] END ..................C=0.001, epsilon=0.8, kernel=poly; total time=   0.0s
[CV] END ...................C=0.001, epsilon=0.8, kernel=rbf; total time=   0.0s
[CV] END ...................C=0.001, epsilon=0.8, kernel=rbf; total time=   0.0s
[CV] END ...................C=0.001, epsilon=0.8, kernel=rbf; total time=   0.0s
[CV] END ...................C=0.001, epsilon=0.8, kernel=rbf; total time=   0.0s
[CV] END ...................C=0.001, epsilon=0.8, kernel=rbf; total time=   0.0s
[CV] END .................C=0.01, epsilon=0.1, kernel=linear; total time=   0.0s
[CV] END .................C=0.01, epsilon=0.1, kernel=linear; total time=   0.0s
[CV] END .................C=

In [33]:
#Using the results of the gridsearch, assign optimal parameters to variables for usage in final model
print(f'Best Parameters: {grid_search.best_params_}\nBest Score {-grid_search.best_score_}')
kernel = grid_search.best_params_['kernel']
C = grid_search.best_params_['C']
epsilon = grid_search.best_params_['epsilon']

Best Parameters: {'C': 1, 'epsilon': 0.1, 'kernel': 'linear'}
Best Score 0.7427696319370851


In [34]:
#Apply parameters to the final model
best_svr_model = SVR(kernel=kernel, C=C, epsilon=epsilon)

In [35]:
#Train final SVR model
best_svr_model.fit(X_train, y_train)

In [36]:
#Use the test split data to create predictions for output values 
y_pred = best_svr_model.predict(X_test)
print(X_test)

[[ 0.48854652 -0.84210787 -0.48422248 -0.44638432]
 [-0.19059105  0.06895014 -0.55640168 -0.3170074 ]
 [ 0.83389375  1.1683848  -0.60877584  0.40284353]
 [-0.15660018  0.40117806 -0.07816784  0.00293753]
 [-1.2521259   0.95603293 -0.95200955 -0.61702497]
 [ 1.31418473  1.28826085  0.92920556  2.53149883]
 [-1.51079642 -0.14682676 -1.33269005 -0.76305834]
 [ 1.50555332 -0.72565685  0.95324818  0.38090329]
 [ 0.32471053  1.42183703  0.86978702  1.54418843]
 [ 1.48277944  0.40460309  1.17515977  1.90021397]
 [ 1.21595112 -1.56478922 -1.80536506 -0.79627062]
 [-0.42444823 -0.7085317   0.19336246 -0.42560148]
 [-0.43056659 -0.25300269 -1.76025092 -0.77936255]
 [-1.72765816  0.05867505 -1.74493676 -0.79521386]
 [ 0.81791804  0.49365387 -0.20087967  0.42267025]
 [ 1.26183879  1.14440959  1.37841985  2.82900031]
 [-0.86768917  0.5998298   0.95784859  0.01410893]
 [ 0.23191545 -1.45176323  1.3515054  -0.56977296]
 [ 0.26590632 -0.04065083 -0.02169644  0.01878884]
 [-1.41936098  1.52801296 -1.25

In [37]:
#Use MSE error analysis to determine the accuracy of the model
print(root_mean_squared_error(y_test, y_pred))
print(X_test)

0.501299678495909
[[ 0.48854652 -0.84210787 -0.48422248 -0.44638432]
 [-0.19059105  0.06895014 -0.55640168 -0.3170074 ]
 [ 0.83389375  1.1683848  -0.60877584  0.40284353]
 [-0.15660018  0.40117806 -0.07816784  0.00293753]
 [-1.2521259   0.95603293 -0.95200955 -0.61702497]
 [ 1.31418473  1.28826085  0.92920556  2.53149883]
 [-1.51079642 -0.14682676 -1.33269005 -0.76305834]
 [ 1.50555332 -0.72565685  0.95324818  0.38090329]
 [ 0.32471053  1.42183703  0.86978702  1.54418843]
 [ 1.48277944  0.40460309  1.17515977  1.90021397]
 [ 1.21595112 -1.56478922 -1.80536506 -0.79627062]
 [-0.42444823 -0.7085317   0.19336246 -0.42560148]
 [-0.43056659 -0.25300269 -1.76025092 -0.77936255]
 [-1.72765816  0.05867505 -1.74493676 -0.79521386]
 [ 0.81791804  0.49365387 -0.20087967  0.42267025]
 [ 1.26183879  1.14440959  1.37841985  2.82900031]
 [-0.86768917  0.5998298   0.95784859  0.01410893]
 [ 0.23191545 -1.45176323  1.3515054  -0.56977296]
 [ 0.26590632 -0.04065083 -0.02169644  0.01878884]
 [-1.41936098

In [38]:
#Create a function to allow the user to input data values and receive the model predicted output value
def test_params(): 
    input_fractional = float(input("What fractional charge yield would you like to test? "))
    input_softness = float(input("What softness factor would you like to test? "))
    input_dose = float(input("What dose would you like to test? "))
    test_data = [[input_fractional, input_softness, input_dose]]
    return best_svr_model.predict(test_data)


In [39]:
# test_params()