In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

file_path = '../data/digit-recognizer/train.csv' 
digits_images = pd.read_csv(file_path)

# 1. Carregar o dataset
# Supondo que `df` seja o DataFrame onde a primeira coluna é o label
labels = digits_images.iloc[:, 0].values  # Coluna de rótulos (números)
pixels = digits_images.iloc[:, 1:].values  # Colunas de pixels

# 2. Pré-processamento: Normalizar os valores dos pixels
pixels_normalized = pixels / 255.0

# 3. Dividir em conjunto de treinamento e teste
X_train, X_test, y_train, y_test = train_test_split(
    pixels_normalized, labels, test_size=0.2, random_state=42
)

# 4. Treinamento do modelo (Logistic Regression)
model = LogisticRegression(max_iter=1000, solver='lbfgs')
model.fit(X_train, y_train)

# 5. Avaliação no conjunto de teste
y_pred = model.predict(X_test)

print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

Classification Report:
               precision    recall  f1-score   support

           0       0.96      0.96      0.96       816
           1       0.96      0.98      0.97       909
           2       0.91      0.89      0.90       846
           3       0.90      0.87      0.89       937
           4       0.92      0.93      0.93       839
           5       0.85      0.88      0.87       702
           6       0.93      0.96      0.94       785
           7       0.93      0.92      0.93       893
           8       0.90      0.89      0.89       835
           9       0.91      0.90      0.91       838

    accuracy                           0.92      8400
   macro avg       0.92      0.92      0.92      8400
weighted avg       0.92      0.92      0.92      8400

Confusion Matrix:
 [[787   0   1   1   3  12  10   1   1   0]
 [  0 890   4   0   1   3   1   3   6   1]
 [  4  15 756  12  12   7  11   9  20   0]
 [  1   3  27 818   0  39   2   9  24  14]
 [  4   2   4   1 782   2 

In [5]:
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import SVC
svm_model = OneVsRestClassifier(SVC()).fit(X_train, y_train)
y_pred = svm_model.predict(X_test)

print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

Classification Report:
               precision    recall  f1-score   support

           0       0.98      0.99      0.99       816
           1       0.99      0.99      0.99       909
           2       0.98      0.97      0.97       846
           3       0.98      0.96      0.97       937
           4       0.97      0.97      0.97       839
           5       0.97      0.97      0.97       702
           6       0.98      0.99      0.98       785
           7       0.98      0.97      0.97       893
           8       0.96      0.98      0.97       835
           9       0.96      0.96      0.96       838

    accuracy                           0.97      8400
   macro avg       0.97      0.97      0.97      8400
weighted avg       0.97      0.97      0.97      8400

Confusion Matrix:
 [[808   0   0   0   2   1   4   0   1   0]
 [  0 901   2   0   1   1   1   1   1   1]
 [  3   4 819   2   7   0   1   3   6   1]
 [  0   0   3 903   0  10   0   6   7   8]
 [  1   0   1   0 815   1 

In [12]:
from sklearn.decomposition import PCA
import numpy as np

# Suponha que X contém imagens planas (n_samples x 784)
pca = PCA(n_components=10)  # Reduzir para 50 dimensões principais
print(X_train[0])

[0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.24705882 0.99215686 0.4745098  0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.24705882 0.99607843 0.79215686 0.         0.         