# Classificação

In [1]:
import numpy as np
import pandas as pd

from sklearn.metrics import classification_report, confusion_matrix  
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier  

In [2]:
# Load gun laws dataset
gun_laws = pd.read_csv('../databases/gun_laws.zip', compression='zip')

states = pd.read_csv('../databases/states.zip', compression='zip')

In [3]:
# Classify each state by number of incidents
state_class = np.concatenate((np.repeat('low', 17), np.repeat('medium', 17), np.repeat('high', 17)))

states = states.sort_values(by='incidents_per_capita')
states['class'] = state_class

# Create labeled dataset
gun_laws_class = pd.merge(states[['state', 'class']], gun_laws, on='state')

# Split train and test sets
y = gun_laws_class['class']
X = gun_laws_class.drop(['state', 'class'], axis=1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)

# Train
classifier = KNeighborsClassifier(n_neighbors=3)
classifier.fit(X_train, y_train)

y_pred = classifier.predict(X_test)

print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[1 0 3]
 [2 0 3]
 [2 0 0]]
              precision    recall  f1-score   support

        high       0.20      0.25      0.22         4
         low       0.00      0.00      0.00         5
      medium       0.00      0.00      0.00         2

   micro avg       0.09      0.09      0.09        11
   macro avg       0.07      0.08      0.07        11
weighted avg       0.07      0.09      0.08        11



  'precision', 'predicted', average, warn_for)
