In [1]:
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
import pandas as pd

#load the data
filepath = r"C:\Users\Admin\Machine Learning Fundamentals\CSV_files\MLData2.xlsx"
df = pd.read_excel(filepath)

#split the data into features variables and target variable
df = df.drop(columns=['DA concentration'])
X = df.drop(columns=['DA Concentration (uM)'])
y = df['DA Concentration (uM)']

#for our train test split we want to manually choose only part of data that was suggested
test_indices = list(range(43,48))
X_test = X.iloc[test_indices]
y_test = y.iloc[test_indices]

In [2]:
#create a training set out of all remaining data
X_train = X.drop(test_indices)
y_train = y.drop(test_indices)

# Standarize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [3]:
#initialize the KNN regressor and train the model
knn_model = KNeighborsRegressor(n_neighbors=5)
knn_model.fit(X_train_scaled, y_train)

#predict the target variable
y_pred = knn_model.predict(X_test_scaled)

#evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

#display the evaluation metrics
print("Mean Squared Error: ", mse)
print("R2 Score: ", r2)

#show predictions vs actual values
data = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
print(data)

Mean Squared Error:  4000.0
R2 Score:  0.13043478260869568
    Actual  Predicted
43      50       30.0
44       0       40.0
45     100      220.0
46     200      140.0
47      50       50.0


In [4]:
import plotly.express as px
import plotly.graph_objects as go

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=y_test,
    y=y_pred,
    mode='markers',
    name='Predictions',
    marker=dict(color='blue', opacity=0.5)
))

# Add the diagonal line for actual value
fig.add_trace(go.Scatter(
    x=[y_test.min(), y_test.max()],
    y=[y_test.min(), y_test.max()],
    mode='lines',
    name='Perfect Prediction',
    line=dict(color='red', dash='dash')
))

fig.update_layout(
    title='Actual vs Predicted Values',
    xaxis_title='Actual Values',
    yaxis_title='Predicted Values',
    width=800,
    height=600,
    hovermode='closest'
)

fig.show()

In [None]:
#now with a change in hyperparameter
#initialize the KNN regressor and train the model
knn_model2 = KNeighborsRegressor(n_neighbors=3, weights='distance', p=1)
knn_model2.fit(X_train_scaled, y_train)

#predict the target variable
y_pred2 = knn_model2.predict(X_test_scaled)

#evaluate the model
mse = mean_squared_error(y_test, y_pred2)
r2 = r2_score(y_test, y_pred)

#display the evaluation metrics
print("Mean Squared Error: ", mse)
print("R2 Score: ", r2)

#show predictions vs actual values
data = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred2})
print(data)

Mean Squared Error:  924.7421768715246
R2 Score:  0.13043478260869568
    Actual   Predicted
43      50   18.457103
44       0   31.437786
45     100  148.115944
46     200  181.997890
47      50   51.096407


In [7]:
#plot the actual vs predicted values based on the new model
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=y_test,
    y=y_pred2,
    mode='markers',
    name='Predictions',
    marker=dict(color='blue', opacity=0.5)
))

# Add the diagonal line for actual value
fig.add_trace(go.Scatter(
    x=[y_test.min(), y_test.max()],
    y=[y_test.min(), y_test.max()],
    mode='lines',
    name='Perfect Prediction',
    line=dict(color='red', dash='dash')
))

fig.update_layout(
    title='Actual vs Predicted Values',
    xaxis_title='Actual Values',
    yaxis_title='Predicted Values',
    width=800,
    height=600,
    hovermode='closest'
)

fig.show()

In [10]:
new_data = pd.DataFrame({
    'AA Concentration (uM)': [50],
    'Peak Current (uA)': [7.00],
    'Peak Potential (mV)': [56.0],
    'Peak Area (uC)': [14.0]
})

#standarize the new data
new_data_scaled = scaler.transform(new_data)

# let's make a prediction with given feature variables
prediction = knn_model.predict(new_data_scaled)
print(f"Predicted DA Concentration: {prediction[0]:.4f}")

Predicted DA Concentration: 30.0000
