In [120]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

import pickle

In [121]:
# DATA
###############################################################################

data = pd.read_csv(os.path.join('..','datasets','raw','Iris.csv'))
data.drop('Id', axis=1, inplace=True)

X = data.drop('Species', axis=1)
y = data['Species']

# Binariza las etiquetas utilizando One-vs-Rest
#y_bin = label_binarize(y, classes=['Iris-setosa', 'Iris-versicolor', 'Iris-Virginica'])
#n_classes = y_bin.shape[1]


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=42)

## Random Forest Model

In [122]:
rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)
y_pred = rf_model.predict(X_test)

In [123]:
acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred, average='weighted', zero_division=1)
rec = recall_score(y_test, y_pred, average='weighted', zero_division=1)
f1 = f1_score(y_test, y_pred, average='weighted', zero_division=1)
scores = {'Accuracy':acc, 'Precision':prec, 'Recall':rec, 'F1':f1}
scores

{'Accuracy': 1.0, 'Precision': 1.0, 'Recall': 1.0, 'F1': 1.0}

In [124]:
with open('../trained_models/rf_model.pkl', 'wb') as model:
    pickle.dump(rf_model, model)

## Logistic Regression Model

In [125]:
lr_model = LogisticRegression()
lr_model.fit(X_train, y_train)
y_pred = lr_model.predict(X_test)

In [126]:
acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred, average='weighted', zero_division=1)
rec = recall_score(y_test, y_pred, average='weighted', zero_division=1)
f1 = f1_score(y_test, y_pred, average='weighted', zero_division=1)
scores = {'Accuracy':acc, 'Precision':prec, 'Recall':rec, 'F1':f1}
scores

{'Accuracy': 1.0, 'Precision': 1.0, 'Recall': 1.0, 'F1': 1.0}

In [127]:
with open('../trained_models/lr_model.pkl', 'wb') as model:
    pickle.dump(lr_model, model)

## KNN Model

In [128]:
knn_model = KNeighborsClassifier()
knn_model.fit(X_train, y_train)
y_pred = knn_model.predict(X_test)

In [129]:
acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred, average='weighted', zero_division=1)
rec = recall_score(y_test, y_pred, average='weighted', zero_division=1)
f1 = f1_score(y_test, y_pred, average='weighted', zero_division=1)
scores = {'Accuracy':acc, 'Precision':prec, 'Recall':rec, 'F1':f1}
scores

{'Accuracy': 1.0, 'Precision': 1.0, 'Recall': 1.0, 'F1': 1.0}

In [130]:
with open('../trained_models/knn_model.pkl', 'wb') as model:
    pickle.dump(knn_model, model)

## SVM Model

In [131]:
svm_model = SVC()
svm_model.fit(X_train, y_train)
y_pred = svm_model.predict(X_test)

In [132]:
acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred, average='weighted', zero_division=1)
rec = recall_score(y_test, y_pred, average='weighted', zero_division=1)
f1 = f1_score(y_test, y_pred, average='weighted', zero_division=1)
scores = {'Accuracy':acc, 'Precision':prec, 'Recall':rec, 'F1':f1}
scores

{'Accuracy': 1.0, 'Precision': 1.0, 'Recall': 1.0, 'F1': 1.0}

In [133]:
with open('../trained_models/svm_model.pkl', 'wb') as model:
    pickle.dump(svm_model, model)