In [1]:
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
import pandas as pd

#load the data into a dataframe
filepath = r"C:\Users\Admin\Machine Learning Fundamentals\CSV_files\MachineLearning-Data.xlsx"
df = pd.read_excel(filepath)

#drop one of the useless column.
df =  df.drop(columns=['DA concentration'])

#assign feature and target variables and display the rows of the dataframe.
X = df.drop('AA Concentration (uM)', axis=1) 
y = df['AA Concentration (uM)']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standarize the features same as SVR
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [2]:
#initialize the KNN regressor and train the model
knn_model = KNeighborsRegressor(n_neighbors=5)
knn_model.fit(X_train_scaled, y_train)

#predict the target variable
y_pred = knn_model.predict(X_test_scaled)

#evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

#display the evaluation metrics
print("Mean Squared Error: ", mse)
print("R2 Score: ", r2)

#show predictions vs actual values
df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
print(df)

Mean Squared Error:  19390.0
R2 Score:  0.9253760064040395
    Actual  Predicted
13    1750     1380.0
39     200      140.0
30       0        0.0
45      30      130.0
17       0        0.0
48     300      330.0
26     100      180.0
25       0        0.0
32     100      290.0
19       0        0.0


In [3]:
import plotly.express as px
import plotly.graph_objects as go

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=y_test,
    y=y_pred,
    mode='markers',
    name='Predictions',
    marker=dict(color='blue', opacity=0.5)
))

# Add the diagonal line for actual value
fig.add_trace(go.Scatter(
    x=[y_test.min(), y_test.max()],
    y=[y_test.min(), y_test.max()],
    mode='lines',
    name='Perfect Prediction',
    line=dict(color='red', dash='dash')
))

fig.update_layout(
    title='Actual vs Predicted Values',
    xaxis_title='Actual Values',
    yaxis_title='Predicted Values',
    width=800,
    height=600,
    hovermode='closest'
)

fig.show()

In [4]:
#initialize the KNN regressor and train the model
knn_model2 = KNeighborsRegressor(n_neighbors=3, weights='distance', p=1)
knn_model2.fit(X_train_scaled, y_train)

#predict the target variable
y_pred2 = knn_model2.predict(X_test_scaled)

#evaluate the model
mse = mean_squared_error(y_test, y_pred2)
r2 = r2_score(y_test, y_pred2)

#display the evaluation metrics
print("Mean Squared Error: ", mse)
print("R2 Score: ", r2)

#show predictions vs actual values
df2 = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred2})
print(df2)

Mean Squared Error:  4023.8321699601634
R2 Score:  0.9845139543020976
    Actual    Predicted
13    1750  1663.994983
39     200   279.276705
30       0     0.000000
45      30   108.084111
17       0     0.000000
48     300   301.735243
26     100   163.853769
25       0     0.000000
32     100   227.981324
19       0     0.000000


In [5]:
import plotly.express as px
import plotly.graph_objects as go

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=y_test,
    y=y_pred2,
    mode='markers',
    name='Predictions',
    marker=dict(color='blue', opacity=0.5)
))

# Add the diagonal line for actual value
fig.add_trace(go.Scatter(
    x=[y_test.min(), y_test.max()],
    y=[y_test.min(), y_test.max()],
    mode='lines',
    name='Perfect Prediction',
    line=dict(color='red', dash='dash')
))

fig.update_layout(
    title='Actual vs Predicted Values',
    xaxis_title='Actual Values',
    yaxis_title='Predicted Values',
    width=800,
    height=600,
    hovermode='closest'
)

fig.show()