In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt

#load the file and inspect the first few rows
filepath = r"C:\Users\Admin\Machine Learning Fundamentals\CSV_files\MachineLearning-Data.xlsx"
data = pd.read_excel(filepath)

#drop the empty column
data = data.drop(columns=['DA concentration'])
data.info()
data.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 5 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   DA Concentration (uM)  50 non-null     int64  
 1   AA Concentration (uM)  50 non-null     int64  
 2   Peak Current (uA)      50 non-null     float64
 3   Peak Potential (mV)    50 non-null     float64
 4   Peak Area (uC)         50 non-null     float64
dtypes: float64(3), int64(2)
memory usage: 2.1 KB


Unnamed: 0,DA Concentration (uM),AA Concentration (uM),Peak Current (uA),Peak Potential (mV),Peak Area (uC)
0,0,100,8.763,133.447,40.418
1,0,150,9.755,147.679,44.194
2,0,200,14.559,172.276,70.734
3,0,300,21.614,174.853,112.173
4,0,400,31.215,164.517,168.123


In [2]:
#split the data into features and target
X = data.drop(columns=['DA Concentration (uM)','AA Concentration (uM)'])
y = data[['DA Concentration (uM)', 'AA Concentration (uM)']]

X.head()

Unnamed: 0,Peak Current (uA),Peak Potential (mV),Peak Area (uC)
0,8.763,133.447,40.418
1,9.755,147.679,44.194
2,14.559,172.276,70.734
3,21.614,174.853,112.173
4,31.215,164.517,168.123


In [4]:
y.head()

Unnamed: 0,DA Concentration (uM),AA Concentration (uM)
0,0,100
1,0,150
2,0,200
3,0,300
4,0,400


SVR doesn't natively support multi-output regression - it expects y (target variable to be 1D array). To train and predict for two targets we need to train seperate SVR model for each target variable.

In [3]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standarize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [4]:
# Initialize separate scalers for DA and AA concentrations
scaler_y_DA = StandardScaler()
scaler_y_AA = StandardScaler()

# Scale the DA and AA concentrations separately
y_train_DA_scaled = scaler_y_DA.fit_transform(y_train[['DA Concentration (uM)']])
y_train_AA_scaled = scaler_y_AA.fit_transform(y_train[['AA Concentration (uM)']])

In [5]:
# Train the SVR model for DA Concentration
svr_model_DA = SVR(kernel='rbf', C=100, gamma=1, epsilon=0.1)
svr_model_DA.fit(X_train_scaled, y_train_DA_scaled.ravel())  # Flattening to 1D array

In [6]:
svr_model_AA = SVR(kernel='rbf', C=100, gamma=1, epsilon=0.1)
svr_model_AA.fit(X_train_scaled, y_train_AA_scaled.ravel())


In [7]:
# Predictions for the test set
y_pred_DA = svr_model_DA.predict(X_test_scaled)
y_pred_AA = svr_model_AA.predict(X_test_scaled)

# Inverse transform the predictions back to the original scale (if scaling was done)
y_pred_DA_original = scaler_y_DA.inverse_transform(y_pred_DA.reshape(-1, 1))
y_pred_AA_original = scaler_y_DA.inverse_transform(y_pred_AA.reshape(-1, 1))

In [8]:
print(y_pred_DA_original[:5])
print(y_pred_AA_original[:5])

[[12.60997056]
 [90.96112713]
 [83.31658191]
 [31.96435631]
 [ 2.33270698]]
[[168.92827816]
 [ 36.23009202]
 [ -2.24341508]
 [ 16.58579723]
 [ 11.22459641]]


In [None]:
# Evaluate the model for DA Concentration
mse_DA = mean_squared_error(y_test['DA Concentration (uM)'], y_pred_DA_original)
r2_DA = r2_score(y_test['DA Concentration (uM)'], y_pred_DA_original)


In [10]:
print(f"Mean Squared Error for DA Concentration: {mse_DA}")
print(f"R^2 Score for DA Concentration: {r2_DA}")

Mean Squared Error for DA Concentration: 1266.3194563836512
R^2 Score for DA Concentration: 0.591542792876816


In [11]:
#Evaluate the model for AA Concentration
mse_AA = mean_squared_error(y_test['AA Concentration (uM)'], y_pred_AA_original)
r2_AA = r2_score(y_test['AA Concentration (uM)'], y_pred_AA_original)
print(f"Mean Squared Error for AA Concentration: {mse_AA}")
print(f"R^2 Score for AA Concentration: {r2_AA}")

Mean Squared Error for AA Concentration: 260510.67953944422
R^2 Score for AA Concentration: -0.002596559135162879


In [12]:
#lets create a table to compare the actual and predicted values for both DA and AA concentrations together
results = pd.DataFrame({'Actual DA Concentration': y_test['DA Concentration (uM)'], 'Predicted DA Concentration': y_pred_DA_original.ravel(),
                        'Actual AA Concentration': y_test['AA Concentration (uM)'], 'Predicted AA Concentration': y_pred_AA_original.ravel()})
results.head()

Unnamed: 0,Actual DA Concentration,Predicted DA Concentration,Actual AA Concentration,Predicted AA Concentration
13,0,12.609971,1750,168.928278
39,200,90.961127,200,36.230092
30,75,83.316582,0,-2.243415
45,30,31.964356,30,16.585797
17,10,2.332707,0,11.224596


In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create subplots - one for DA and one for AA
fig = make_subplots(rows=1, cols=2, 
                    subplot_titles=('DA Concentration Prediction', 'AA Concentration Prediction'))

# Add DA concentration scatter plot
fig.add_trace(
    go.Scatter(
        x=results['Actual DA Concentration'],
        y=results['Predicted DA Concentration'],
        mode='markers',
        name='DA Predictions',
        marker=dict(color='blue', opacity=0.6),
        hovertemplate='Actual: %{x:.2f}<br>Predicted: %{y:.2f}<extra></extra>'
    ),
    row=1, col=1
)

# Add perfect prediction line for DA
fig.add_trace(
    go.Scatter(
        x=[results['Actual DA Concentration'].min(), results['Actual DA Concentration'].max()],
        y=[results['Actual DA Concentration'].min(), results['Actual DA Concentration'].max()],
        mode='lines',
        name='Perfect Prediction (DA)',
        line=dict(color='red', dash='dash')
    ),
    row=1, col=1
)

# Add AA concentration scatter plot
fig.add_trace(
    go.Scatter(
        x=results['Actual AA Concentration'],
        y=results['Predicted AA Concentration'],
        mode='markers',
        name='AA Predictions',
        marker=dict(color='green', opacity=0.6),
        hovertemplate='Actual: %{x:.2f}<br>Predicted: %{y:.2f}<extra></extra>'
    ),
    row=1, col=2
)

# Add perfect prediction line for AA
fig.add_trace(
    go.Scatter(
        x=[results['Actual AA Concentration'].min(), results['Actual AA Concentration'].max()],
        y=[results['Actual AA Concentration'].min(), results['Actual AA Concentration'].max()],
        mode='lines',
        name='Perfect Prediction (AA)',
        line=dict(color='red', dash='dash')
    ),
    row=1, col=2
)

# Update layout
fig.update_layout(
    title='SVR Model Actual vs Predicted Concentrations',
    width=1200,
    height=600,
    showlegend=True,
    hovermode='closest'
)

# Update axes labels
fig.update_xaxes(title_text='Actual DA Concentration (µM)', row=1, col=1)
fig.update_yaxes(title_text='Predicted DA Concentration (µM)', row=1, col=1)
fig.update_xaxes(title_text='Actual AA Concentration (µM)', row=1, col=2)
fig.update_yaxes(title_text='Predicted AA Concentration (µM)', row=1, col=2)

# Show the plot
fig.show()