In [1]:
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
import pandas as pd

#load the data into a dataframe
filepath = r"C:\Users\Admin\Machine Learning Fundamentals\CSV_files\MachineLearning-Data.xlsx"
df = pd.read_excel(filepath)

#drop one of the useless column.
df =  df.drop(columns=['DA concentration', 'Peak Potential (mV)'])

#assign feature and target variables and display the rows of the dataframe.
X = df.drop('DA Concentration (uM)', axis=1) 
y = df['DA Concentration (uM)']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standarize the features same as SVR
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [2]:
#initialize the KNN regressor and train the model
knn_model = KNeighborsRegressor(n_neighbors=5)
knn_model.fit(X_train_scaled, y_train)

#predict the target variable
y_pred = knn_model.predict(X_test_scaled)

#evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

#display the evaluation metrics
print("Mean Squared Error: ", mse)
print("R2 Score: ", r2)

#show predictions vs actual values
df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
print(df)

Mean Squared Error:  945.9
R2 Score:  0.6948955729376662
    Actual  Predicted
13       0        0.0
39     200      112.0
30      75       50.0
45      30       30.0
17      10       22.0
48      30       52.0
26      50       42.0
25      50       35.0
32     100       87.0
19      20       22.0


In [3]:
import plotly.express as px
import plotly.graph_objects as go

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=y_test,
    y=y_pred,
    mode='markers',
    name='Predictions',
    marker=dict(color='blue', opacity=0.5)
))

# Add the diagonal line for actual value
fig.add_trace(go.Scatter(
    x=[y_test.min(), y_test.max()],
    y=[y_test.min(), y_test.max()],
    mode='lines',
    name='Perfect Prediction',
    line=dict(color='red', dash='dash')
))

fig.update_layout(
    title='Actual vs Predicted Values',
    xaxis_title='Actual Values',
    yaxis_title='Predicted Values',
    width=800,
    height=600,
    hovermode='closest'
)

fig.show()

In [4]:
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score

# Standardizing the features
scaler = StandardScaler()
X_train_scaled_manual = scaler.fit_transform(X_train)
X_test_scaled_manual = scaler.transform(X_test)

# Set up the parameter grid for hyperparameter tuning
param_grid = {
    'n_neighbors': [3, 5, 7, 9, 11],  
    'weights': ['uniform', 'distance'],  
    'p': [1, 2]  
}

knn_model = KNeighborsRegressor()
grid_search = GridSearchCV(knn_model, param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train_scaled_manual, y_train)

# Output the best parameters and best score
print(f"Best Parameters: {grid_search.best_params_}")
print(f"Best Score (Negative MSE): {grid_search.best_score_}")

# Train the model with the best parameters
best_knn_model = grid_search.best_estimator_

# Predict on the test set
y_pred_manual_knn_best = best_knn_model.predict(X_test_scaled_manual)

# Evaluate the model's performance
mse_manual_knn_best = mean_squared_error(y_test, y_pred_manual_knn_best)
r2_manual_knn_best = r2_score(y_test, y_pred_manual_knn_best)

# Output performance metrics
print(f"Mean Squared Error (Best KNN Manual Test Split): {mse_manual_knn_best}")
print(f"R^2 Score (Best KNN Manual Test Split): {r2_manual_knn_best}")

Best Parameters: {'n_neighbors': 3, 'p': 1, 'weights': 'distance'}
Best Score (Negative MSE): -1641.674582914828
Mean Squared Error (Best KNN Manual Test Split): 2662.25283599312
R^2 Score (Best KNN Manual Test Split): 0.14127801435590037


In [5]:
#initialize the KNN regressor and train the model
knn_model2 = KNeighborsRegressor(n_neighbors=3, weights='distance', p=1)
knn_model2.fit(X_train_scaled, y_train)

#predict the target variable
y_pred2 = knn_model2.predict(X_test_scaled)

#evaluate the model
mse = mean_squared_error(y_test, y_pred2)
r2 = r2_score(y_test, y_pred2)

#display the evaluation metrics
print("Mean Squared Error: ", mse)
print("R2 Score: ", r2)

#show predictions vs actual values
df2 = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred2})
print(df2)

Mean Squared Error:  2662.25283599312
R2 Score:  0.14127801435590037
    Actual  Predicted
13       0   0.000000
39     200  53.192235
30      75  69.949046
45      30  40.790627
17      10  13.231794
48      30  79.166242
26      50  31.294565
25      50  41.031882
32     100  54.780741
19      20  14.980734


In [6]:
import plotly.express as px
import plotly.graph_objects as go

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=y_test,
    y=y_pred2,
    mode='markers',
    name='Predictions',
    marker=dict(color='blue', opacity=0.5)
))

# Add the diagonal line for actual value
fig.add_trace(go.Scatter(
    x=[y_test.min(), y_test.max()],
    y=[y_test.min(), y_test.max()],
    mode='lines',
    name='Perfect Prediction',
    line=dict(color='red', dash='dash')
))

fig.update_layout(
    title='Actual vs Predicted Values',
    xaxis_title='Actual Values',
    yaxis_title='Predicted Values',
    width=800,
    height=600,
    hovermode='closest'
)

fig.show()