In [33]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import tensorflow as tf
import numpy as np

In [34]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

X = np.concatenate([x_train, x_test])
y = np.concatenate([y_train, y_test])

X = X.reshape(X.shape[0], -1) # transformando de (28,28) para (784)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

pca = PCA(n_components = 0.95) # redução de dimensionalidade
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)

In [35]:
params = {
    'n_neighbors': list(range(3, 11, 2)),
    'weights': ['uniform', 'distance'],
    'p': [1, 2] # Distância de manhattan e euclidiana
}

In [36]:
param_grid = {
    'C': [0.01, 0.1, 1],                 # Regularização leve
    'solver': ['lbfgs'],           # Solver eficiente para esse caso
    'max_iter': [500, 1000]         # Menos iterações
}

In [37]:
knn = KNeighborsClassifier()
KNN1 = GridSearchCV(knn, params, cv=3, n_jobs=-1)
KNN1.fit(X_train, y_train)

0,1,2
,estimator,KNeighborsClassifier()
,param_grid,"{'n_neighbors': [3, 5, ...], 'p': [1, 2], 'weights': ['uniform', 'distance']}"
,scoring,
,n_jobs,-1
,refit,True
,cv,3
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,n_neighbors,3
,weights,'distance'
,algorithm,'auto'
,leaf_size,30
,p,2
,metric,'minkowski'
,metric_params,
,n_jobs,


In [38]:
KNN1.best_params_

{'n_neighbors': 3, 'p': 2, 'weights': 'distance'}

In [39]:
predictions = KNN1.predict(X_test)
print(classification_report(y_test, predictions))

              precision    recall  f1-score   support

           0       0.97      0.98      0.97      1343
           1       0.97      0.99      0.98      1600
           2       0.96      0.94      0.95      1380
           3       0.95      0.95      0.95      1433
           4       0.95      0.94      0.95      1295
           5       0.95      0.94      0.94      1273
           6       0.97      0.98      0.97      1396
           7       0.94      0.94      0.94      1503
           8       0.96      0.92      0.94      1357
           9       0.90      0.93      0.92      1420

    accuracy                           0.95     14000
   macro avg       0.95      0.95      0.95     14000
weighted avg       0.95      0.95      0.95     14000



In [40]:
clf = LogisticRegression()
grid = GridSearchCV(clf, param_grid, cv=3)
grid.fit(X_train, y_train)

0,1,2
,estimator,LogisticRegression()
,param_grid,"{'C': [0.01, 0.1, ...], 'max_iter': [500, 1000], 'solver': ['lbfgs']}"
,scoring,
,n_jobs,
,refit,True
,cv,3
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,0.01
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'lbfgs'
,max_iter,500


In [41]:
grid.best_params_

{'C': 0.01, 'max_iter': 500, 'solver': 'lbfgs'}

In [42]:
pred_linear = grid.predict(X_test)
print(classification_report(y_test, pred_linear))

              precision    recall  f1-score   support

           0       0.97      0.97      0.97      1343
           1       0.94      0.97      0.96      1600
           2       0.92      0.91      0.91      1380
           3       0.90      0.89      0.90      1433
           4       0.92      0.93      0.92      1295
           5       0.88      0.89      0.88      1273
           6       0.95      0.96      0.95      1396
           7       0.93      0.94      0.94      1503
           8       0.90      0.87      0.88      1357
           9       0.90      0.90      0.90      1420

    accuracy                           0.92     14000
   macro avg       0.92      0.92      0.92     14000
weighted avg       0.92      0.92      0.92     14000



In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)

In [None]:
KNN2 = GridSearchCV(knn, params, cv=3, n_jobs=-1)
KNN2.fit(X_train, y_train)

0,1,2
,estimator,KNeighborsClassifier()
,param_grid,"{'n_neighbors': [3, 5, ...], 'p': [1, 2], 'weights': ['uniform', 'distance']}"
,scoring,
,n_jobs,-1
,refit,True
,cv,3
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,n_neighbors,3
,weights,'distance'
,algorithm,'auto'
,leaf_size,30
,p,2
,metric,'minkowski'
,metric_params,
,n_jobs,


In [None]:
KNN2.best_params_

{'n_neighbors': 3, 'p': 2, 'weights': 'distance'}

In [None]:
predictions = KNN2.predict(X_test)
print(classification_report(y_test, predictions))

              precision    recall  f1-score   support

           0       0.96      0.98      0.97      2058
           1       0.96      0.99      0.98      2364
           2       0.96      0.94      0.95      2133
           3       0.94      0.94      0.94      2176
           4       0.95      0.94      0.95      1936
           5       0.94      0.93      0.94      1915
           6       0.96      0.98      0.97      2088
           7       0.94      0.94      0.94      2248
           8       0.96      0.92      0.94      1992
           9       0.91      0.93      0.92      2090

    accuracy                           0.95     21000
   macro avg       0.95      0.95      0.95     21000
weighted avg       0.95      0.95      0.95     21000



In [None]:
clf = LogisticRegression()
grid2 = GridSearchCV(clf, param_grid, cv=3)
grid2.fit(X_train, y_train)

In [None]:
grid2.best_params_

{'C': 0.01, 'max_iter': 500, 'solver': 'lbfgs'}

In [None]:
pred_linear = grid2.predict(X_test)
print(classification_report(y_test, pred_linear))

              precision    recall  f1-score   support

           0       0.97      0.96      0.97      1714
           1       0.95      0.97      0.96      1977
           2       0.92      0.90      0.91      1761
           3       0.90      0.89      0.90      1806
           4       0.92      0.93      0.92      1587
           5       0.89      0.88      0.89      1607
           6       0.95      0.96      0.95      1761
           7       0.93      0.94      0.93      1878
           8       0.90      0.87      0.88      1657
           9       0.90      0.90      0.90      1752

    accuracy                           0.92     17500
   macro avg       0.92      0.92      0.92     17500
weighted avg       0.92      0.92      0.92     17500



In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)

In [None]:
KNN3 = GridSearchCV(knn, params, cv=3, n_jobs=-1)
KNN3.fit(X_train, y_train)

In [None]:
KNN3.best_params_

{'n_neighbors': 3, 'p': 1, 'weights': 'distance'}

In [None]:
predictions = KNN3.predict(X_test)
print(classification_report(y_test, predictions))

              precision    recall  f1-score   support

           0       0.92      0.99      0.95      1714
           1       0.79      1.00      0.88      1977
           2       0.98      0.88      0.93      1761
           3       0.93      0.91      0.92      1806
           4       0.96      0.90      0.93      1587
           5       0.94      0.88      0.91      1607
           6       0.96      0.97      0.97      1761
           7       0.92      0.95      0.93      1878
           8       0.98      0.79      0.88      1657
           9       0.92      0.93      0.93      1752

    accuracy                           0.92     17500
   macro avg       0.93      0.92      0.92     17500
weighted avg       0.93      0.92      0.92     17500



In [None]:
clf = LogisticRegression()
grid3 = GridSearchCV(clf, param_grid, cv=3)
grid3.fit(X_train, y_train)

In [None]:
grid3.best_params_

{'C': 0.01, 'max_iter': 500, 'solver': 'lbfgs'}

In [None]:
pred_linear = grid3.predict(X_test)
print(classification_report(y_test, pred_linear))

              precision    recall  f1-score   support

           0       0.97      0.96      0.97      1714
           1       0.95      0.97      0.96      1977
           2       0.92      0.90      0.91      1761
           3       0.90      0.89      0.90      1806
           4       0.92      0.93      0.92      1587
           5       0.89      0.88      0.89      1607
           6       0.95      0.96      0.95      1761
           7       0.93      0.94      0.93      1878
           8       0.90      0.87      0.88      1657
           9       0.90      0.90      0.90      1752

    accuracy                           0.92     17500
   macro avg       0.92      0.92      0.92     17500
weighted avg       0.92      0.92      0.92     17500



Classificador Linear com Regressão Logística