In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt

#load the file and inspect the first few rows
filepath = r"C:\Users\Admin\Machine Learning Fundamentals\CSV_files\MachineLearning-Data.xlsx"
data = pd.read_excel(filepath)

#drop the empty column
data = data.drop(columns=['DA concentration'])
data.info()
data.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 5 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   DA Concentration (uM)  50 non-null     int64  
 1   AA Concentration (uM)  50 non-null     int64  
 2   Peak Current (uA)      50 non-null     float64
 3   Peak Potential (mV)    50 non-null     float64
 4   Peak Area (uC)         50 non-null     float64
dtypes: float64(3), int64(2)
memory usage: 2.1 KB


Unnamed: 0,DA Concentration (uM),AA Concentration (uM),Peak Current (uA),Peak Potential (mV),Peak Area (uC)
0,0,100,8.763,133.447,40.418
1,0,150,9.755,147.679,44.194
2,0,200,14.559,172.276,70.734
3,0,300,21.614,174.853,112.173
4,0,400,31.215,164.517,168.123


In [2]:
#split the data into features and target
X = data.drop(columns=['DA Concentration (uM)'])
y = data['DA Concentration (uM)']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standarize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [3]:
# Initialize and train the SVR model
svr_model = SVR(kernel='rbf', C=100, gamma='auto', epsilon=1.0)
svr_model.fit(X_train_scaled, y_train)

# Make predictions
y_pred = svr_model.predict(X_test_scaled)

# Calculate performance metrics
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse:.4f}")
print(f"R^2 Score: {r2:.4f}") 

#show predictions vs actual values
df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
print(df)

Mean Squared Error: 3060.3030
R^2 Score: 0.0129
    Actual  Predicted
13       0   1.610615
39     200  42.657740
30      75  54.724343
45      30  27.978835
17      10  21.257461
48      30  39.880211
26      50  29.941658
25      50  38.016780
32     100  31.881007
19      20  24.263862


In [4]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 'auto', 0.1, 1],
    'epsilon': [0.1, 0.2, 0.5, 1.0],
    'kernel': ['rbf', 'poly', 'linear', 'sigmoid']
}

grid_search = GridSearchCV(SVR(), param_grid, cv=5, scoring='r2')
grid_search.fit(X_train_scaled, y_train)

print("Best parameters:", grid_search.best_params_)
best_model = grid_search.best_estimator_

Best parameters: {'C': 100, 'epsilon': 0.1, 'gamma': 1, 'kernel': 'rbf'}


In [5]:
import plotly.express as px
import plotly.graph_objects as go

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=y_test,
    y=y_pred,
    mode='markers',
    name='Predictions',
    marker=dict(color='blue', opacity=0.5)
))

# Add the diagonal line for actual value
fig.add_trace(go.Scatter(
    x=[y_test.min(), y_test.max()],
    y=[y_test.min(), y_test.max()],
    mode='lines',
    name='Perfect Prediction',
    line=dict(color='red', dash='dash')
))

fig.update_layout(
    title='Actual vs Predicted Values',
    xaxis_title='Actual Values',
    yaxis_title='Predicted Values',
    width=800,
    height=600,
    hovermode='closest'
)

fig.show()

In [7]:
# Initialize and train the SVR model
svr_model2 = SVR(kernel='rbf', C=100, gamma=1, epsilon=0.1)
svr_model2.fit(X_train_scaled, y_train)

# Make predictions
y_pred2 = svr_model2.predict(X_test_scaled)

# Calculate performance metrics
mse = mean_squared_error(y_test, y_pred2)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse:.4f}")
print(f"R^2 Score: {r2:.4f}") 

#show predictions vs actual values
df2 = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred2})
print(df2)

Mean Squared Error: 2237.5973
R^2 Score: 0.0129
    Actual  Predicted
13       0   9.301197
39     200  63.879979
30      75  78.443163
45      30  34.340468
17      10   8.825217
48      30  33.585970
26      50  33.649278
25      50  46.807167
32     100  41.419605
19      20  17.418726


In [8]:
import plotly.express as px
import plotly.graph_objects as go

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=y_test,
    y=y_pred2,
    mode='markers',
    name='Predictions',
    marker=dict(color='blue', opacity=0.5)
))

# Add the diagonal line for actual value
fig.add_trace(go.Scatter(
    x=[y_test.min(), y_test.max()],
    y=[y_test.min(), y_test.max()],
    mode='lines',
    name='Perfect Prediction',
    line=dict(color='red', dash='dash')
))

fig.update_layout(
    title='Actual vs Predicted Values',
    xaxis_title='Actual Values',
    yaxis_title='Predicted Values',
    width=800,
    height=600,
    hovermode='closest'
)

fig.show()