In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt

#load the file and inspect the first few rows
filepath = r"C:\Users\Admin\Machine Learning Fundamentals\CSV_files\MachineLearning-Data.xlsx"
data = pd.read_excel(filepath)

#drop the empty column
data = data.drop(columns=['DA concentration'])
data.info()
data.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 5 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   DA Concentration (uM)  50 non-null     int64  
 1   AA Concentration (uM)  50 non-null     int64  
 2   Peak Current (uA)      50 non-null     float64
 3   Peak Potential (mV)    50 non-null     float64
 4   Peak Area (uC)         50 non-null     float64
dtypes: float64(3), int64(2)
memory usage: 2.1 KB


Unnamed: 0,DA Concentration (uM),AA Concentration (uM),Peak Current (uA),Peak Potential (mV),Peak Area (uC)
0,0,100,8.763,133.447,40.418
1,0,150,9.755,147.679,44.194
2,0,200,14.559,172.276,70.734
3,0,300,21.614,174.853,112.173
4,0,400,31.215,164.517,168.123


In [5]:
#split the data into features and target
X = data.drop(columns=['AA Concentration (uM)'])
y = data['AA Concentration (uM)']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standarize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [6]:
# Initialize and train the SVR model
svr_model = SVR(kernel='rbf', C=100, gamma=1, epsilon=0.1)
svr_model.fit(X_train_scaled, y_train)

# Make predictions
y_pred = svr_model.predict(X_test_scaled)

# Calculate performance metrics
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse:.4f}")
print(f"R^2 Score: {r2:.4f}") 

#show predictions vs actual values
df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
print(df)

Mean Squared Error: 178942.0408
R^2 Score: 0.3113
    Actual   Predicted
13    1750  422.949088
39     200  280.231074
30       0   33.101653
45      30   83.750414
17       0   29.356408
48     300  305.104214
26     100  143.373550
25       0   11.255999
32     100  222.476755
19       0    6.169600


In [7]:
import plotly.express as px
import plotly.graph_objects as go

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=y_test,
    y=y_pred,
    mode='markers',
    name='Predictions',
    marker=dict(color='blue', opacity=0.5)
))

# Add the diagonal line for actual value
fig.add_trace(go.Scatter(
    x=[y_test.min(), y_test.max()],
    y=[y_test.min(), y_test.max()],
    mode='lines',
    name='Perfect Prediction',
    line=dict(color='red', dash='dash')
))

fig.update_layout(
    title='Actual vs Predicted Values',
    xaxis_title='Actual AA Values',
    yaxis_title='Predicted AA Values',
    width=800,
    height=600,
    hovermode='closest'
)

fig.show()