-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
49 lines (40 loc) · 1.75 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
from sklearn.model_selection import GridSearchCV
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
"""
Here I set the global variables, which
will be used to simple_neural_network the computing time
for both training and testing
"""
def loadDataset(fileName, samples): # A function for loading the data from a dataset
x = [] # Array for data inputs
y = [] # Array for labels (expected outputs)
train_data = pd.read_csv(fileName) # Data has to be stored in a CSV file, separated by commas
y = np.array(train_data.iloc[0:samples, 0]) # Labels column
x = np.array(train_data.iloc[0:samples, 1:]) / 255 # Division by 255 is used for data normalization
return x, y
# Define the grid of hyperparameter values to search over
param_grid = {
'n_neighbors': [3],
'weights': ['distance'],
'leaf_size': [1,2,3]
}
# Define the scoring metric to use for evaluation
scoring = 'accuracy'
# Create a GridSearchCV object
clf = KNeighborsClassifier()
grid_search = GridSearchCV(estimator=clf, param_grid=param_grid, scoring=scoring, cv=2, verbose=2)
train_x,train_y = loadDataset("../../../../datasets/mnist/mnist_train.csv",50000)
test_x,test_y = loadDataset("../../../../datasets/mnist/mnist_test.csv",10000)
# Fit the grid search object to the training data
grid_search.fit(train_x, train_y)
# Print the best hyperparameters and the best score
print("Best hyperparameters:", grid_search.best_params_)
print("Best score:", grid_search.best_score_)
# Re-train the model with the best hyperparameters
best_clf = grid_search.best_estimator_
best_clf.fit(train_x, train_y)
# Test the model with the best hyperparameters on the testing data
accuracy = best_clf.score(test_x, test_y)
print("Testing accuracy:", accuracy)