**Atividade**

Construa um classificador por regressão logística para separar as flores do tipo 'Iris Virginica' das demais usando as características 'petal length (cm)' e 'petal width (cm)'. Como resultado final, apresente:

- Acurácia do classificador no conjunto de testes.
- Curva ROC e respectiva área.
- Um diagrama ilustrando a probabilidade da classe positiva. 
    - Dica: veja https://matplotlib.org/gallery/images_contours_and_fields/contour_demo.html

Use seu arsenal de ferramentas de validação para encontrar o melhor modelo.

In [None]:
from sklearn import datasets
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.metrics import roc_curve, roc_auc_score, confusion_matrix, precision_recall_curve
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_predict
import matplotlib.pyplot as plt

RANDOM_SEED = 42

iris = datasets.load_iris()
df = pd.DataFrame(columns=iris.feature_names,data=iris.data).drop(["sepal length (cm)","sepal width (cm)"], axis=1) # monta dataframe
df["target"] = iris.target == 2 # coloca classes
df

In [None]:
train_set, test_set = train_test_split(
    df,
    test_size=0.2,
    random_state=RANDOM_SEED,
)
X_train = train_set.drop(["target"], axis=1)
y_train = train_set["target"]
X_test = test_set.drop(["target"], axis=1)
y_test = test_set["target"]

std_scaler = StandardScaler()

lr_classifier = Pipeline([
    ('std_scaler', std_scaler),
    ('logit_reg', LogisticRegression()),
])

res = cross_val_score(
    lr_classifier,
    X_train,
    y_train,
    cv=4,
    scoring="accuracy",
    n_jobs=-1,
)
res

In [None]:
y_train_pred = cross_val_predict(lr_classifier, X_train, y_train, cv=4, n_jobs=-1)
mat = confusion_matrix(y_train, y_train_pred)
mat

In [None]:
y_scores = cross_val_predict(
    lr_classifier,
    X_train,
    y_train,
    cv=4,
    method="decision_function",
    n_jobs=-1,
)

fpr, tpr, thresholds = roc_curve(y_train, y_scores)

plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, linewidth=2)
plt.plot([0, 1], [0, 1], 'k--')
plt.axis([0, 1, 0, 1])
plt.xlabel('False Positive Rate', fontsize=16)
plt.ylabel('True Positive Rate', fontsize=16)
plt.show()

print(f"Qualidade ROC-AUC do Classificador: {roc_auc_score(y_train, y_scores)}")

In [None]:
precisions, recalls, thresholds = precision_recall_curve(y_train, y_scores)

plt.plot(thresholds, precisions[:-1], "b--", label="Precision", linewidth=2)
plt.plot(thresholds, recalls[:-1], "g-", label="Recall", linewidth=2)

plt.xlabel("Threshold", fontsize=16)
plt.legend(loc="lower left", fontsize=16)
plt.show()