In [21]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.preprocessing import StandardScaler

In [22]:
# Load the dataset
file_path = 'training_data.csv'
data = pd.read_csv(file_path)
data['increase_stock_binary'] = data['increase_stock'].apply(lambda x: 1 if x == 'high_bike_demand' else 0)
corrdata = data.drop(['increase_stock'], axis=1)
# Separating features and the target variable
X = data.drop(['increase_stock', 'increase_stock_binary'], axis=1)
y = data['increase_stock_binary']

# Splitting the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [23]:
corrdata.head()

Unnamed: 0,hour_of_day,day_of_week,month,holiday,weekday,summertime,temp,dew,humidity,precip,snow,snowdepth,windspeed,cloudcover,visibility,increase_stock_binary
0,5,5,1,0,0,0,-7.2,-15.0,53.68,0.0,0,0.0,16.3,31.6,16.0,0
1,21,4,1,0,1,0,-1.3,-12.8,40.97,0.0,0,0.0,23.9,85.7,16.0,0
2,21,3,8,0,1,1,26.9,21.8,73.39,0.0,0,0.0,0.0,81.1,16.0,0
3,1,6,1,0,0,0,3.1,-4.0,59.74,0.0,0,0.0,19.2,0.0,16.0,0
4,17,0,3,0,1,0,11.7,-11.4,18.71,0.0,0,0.0,10.5,44.6,16.0,0


In [24]:
X

Unnamed: 0,hour_of_day,day_of_week,month,holiday,weekday,summertime,temp,dew,humidity,precip,snow,snowdepth,windspeed,cloudcover,visibility
0,5,5,1,0,0,0,-7.2,-15.0,53.68,0.000,0,0.0,16.3,31.6,16.0
1,21,4,1,0,1,0,-1.3,-12.8,40.97,0.000,0,0.0,23.9,85.7,16.0
2,21,3,8,0,1,1,26.9,21.8,73.39,0.000,0,0.0,0.0,81.1,16.0
3,1,6,1,0,0,0,3.1,-4.0,59.74,0.000,0,0.0,19.2,0.0,16.0
4,17,0,3,0,1,0,11.7,-11.4,18.71,0.000,0,0.0,10.5,44.6,16.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1595,3,5,6,0,0,1,21.5,19.4,87.68,0.000,0,0.0,10.6,24.4,16.0
1596,14,0,6,0,1,1,23.2,20.1,82.43,2.217,0,0.0,9.8,92.1,10.4
1597,13,0,3,0,1,1,13.9,-2.2,32.93,0.000,0,2.0,18.2,79.3,16.0
1598,14,5,3,0,0,1,11.7,-9.3,22.09,0.000,0,0.0,5.8,24.4,16.0


In [25]:
y

0       0
1       0
2       0
3       0
4       0
       ..
1595    0
1596    0
1597    0
1598    1
1599    0
Name: increase_stock_binary, Length: 1600, dtype: int64

In [26]:
# Calculating the correlation matrix
correlation_matrix = corrdata.corr()

In [27]:
correlation_matrix

Unnamed: 0,hour_of_day,day_of_week,month,holiday,weekday,summertime,temp,dew,humidity,precip,snow,snowdepth,windspeed,cloudcover,visibility,increase_stock_binary
hour_of_day,1.0,-0.005963,-0.016503,0.012732,-0.028899,0.005376,0.157994,-0.023067,-0.325988,-0.008747,,-0.006031,0.161313,0.002672,0.012115,0.240544
day_of_week,-0.005963,1.0,-0.032363,-0.128759,-0.78615,0.010263,0.003727,0.014271,0.016976,-0.011158,,-0.04593,0.033726,0.043354,-0.01881,0.083688
month,-0.016503,-0.032363,1.0,-0.016021,0.049254,0.046929,0.207436,0.272927,0.173159,0.004232,,-0.127751,-0.207056,-0.083544,0.028287,0.037212
holiday,0.012732,-0.128759,-0.016021,1.0,0.118294,-0.132132,-0.055069,-0.060239,-0.01726,-0.005564,,-0.013385,-0.019129,0.014093,-0.028171,-0.004909
weekday,-0.028899,-0.78615,0.049254,0.118294,1.0,-0.023872,0.007433,0.009804,0.016758,0.029214,,0.024333,-0.024126,0.008788,-0.011815,-0.116446
summertime,0.005376,0.010263,0.046929,-0.132132,-0.023872,1.0,0.707637,0.645944,0.064755,0.01293,,-0.127991,-0.028827,-0.022964,0.068963,0.216052
temp,0.157994,0.003727,0.207436,-0.055069,0.007433,0.707637,1.0,0.871536,0.008306,-0.003313,,-0.153692,-0.037656,-0.012421,0.119909,0.336981
dew,-0.023067,0.014271,0.272927,-0.060239,0.009804,0.645944,0.871536,1.0,0.48959,0.071566,,-0.11966,-0.190793,0.146844,-0.050474,0.132663
humidity,-0.325988,0.016976,0.173159,-0.01726,0.016758,0.064755,0.008306,0.48959,1.0,0.174768,,0.033598,-0.340557,0.331976,-0.376796,-0.308726
precip,-0.008747,-0.011158,0.004232,-0.005564,0.029214,0.01293,-0.003313,0.071566,0.174768,1.0,,0.011242,0.060778,0.12666,-0.455058,-0.059304


In [28]:
X_corr = data[['temp','humidity','hour_of_day','summertime','dew','visibility','windspeed']]

In [29]:
y_corr = y

In [38]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

In [39]:
# Standardizing the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Creating a KNN classifier
knn = KNeighborsClassifier(n_neighbors=2)

# Fitting the classifier to the training data
knn.fit(X_train, y_train)

# Making predictions on the test data
y_pred = knn.predict(X_test)

# Generating a classification report and confusion matrix
classification_report_result = classification_report(y_test, y_pred)
confusion_matrix_result = confusion_matrix(y_test, y_pred)
accuracy_score_data = accuracy_score(y_test,y_pred)
# Printing the results
print("Classification Report:\n", classification_report_result)
#print("Confusion Matrix:\n", confusion_matrix_result)
print("accuracy_score:\n",accuracy_score_data)

Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.97      0.93       270
           1       0.68      0.30      0.42        50

    accuracy                           0.87       320
   macro avg       0.78      0.64      0.67       320
weighted avg       0.85      0.87      0.85       320

accuracy_score:
 0.86875
