In [0]:
%pip install openpyxl

In [0]:
dbutils.library.restartPython()

In [0]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import linear_model
from sklearn import tree
from sklearn import naive_bayes

In [0]:
df = pd.read_excel("/Volumes/workspace/machine_learning/planilhas/dados_cerveja_nota.xlsx")
df.head()

In [0]:
df['aprovado'] = (df['nota'] > 5).astype(int)
df

In [0]:
plt.plot(df['cerveja'], df['aprovado'], 'o', color='blue')
plt.grid()
plt.title("Cerveja VS Aprovação")
plt.xlabel("Cerveja")
plt.ylabel("Aprovado")

In [0]:
reg = linear_model.LogisticRegression(penalty=None,fit_intercept=True)

reg.fit(df[['cerveja']], df['aprovado'])
reg_predict = reg.predict(df[['cerveja']].drop_duplicates())
reg_proba = reg.predict_proba(df[['cerveja']].drop_duplicates())[:,1]

plt.grid(True)
plt.title("Modelo de Regressão Logistica")
plt.xlabel("Cerveja")
plt.ylabel("Aprovado")
plt.plot(df['cerveja'], df['aprovado'], 'o', color='purple')
plt.plot(df["cerveja"].drop_duplicates(), reg_predict, color="green")
plt.plot(df["cerveja"].drop_duplicates(), reg_proba, color="magenta")
plt.hlines(0.5, xmin=1, xmax=9, linestyle="--", color="black")

plt.legend(
    [
        "Notas",
        "Regressao Predict",
        "Regressao Proba",
        "Linha de media"
    ]
)

In [0]:
arvore_full = tree.DecisionTreeClassifier(random_state=42)
arvore_full.fit(df[["cerveja"]], df["aprovado"])
arvore_full_predict = arvore_full.predict(df[["cerveja"]].drop_duplicates())
arvore_full_proba = arvore_full.predict_proba(df[["cerveja"]].drop_duplicates())[:, 1]

plt.title("Modelo de Arvore de Decisão")
plt.xlabel("Cerveja")
plt.ylabel("Aprovado")
plt.plot(df['cerveja'], df['aprovado'], 'o', color='purple')
plt.plot(df["cerveja"].drop_duplicates(), arvore_full_predict, color="green")
plt.plot(df["cerveja"].drop_duplicates(), arvore_full_proba, color="magenta")
plt.hlines(0.5, xmin=1, xmax=9, linestyle="--", color="black")

plt.legend(
    [
        "Notas",
        "Arvore Predict",
        "Arvore Proba",
        "Linha de media"
    ]
)

In [0]:
arvore_d2 = tree.DecisionTreeClassifier(random_state=42, max_depth=2)
arvore_d2.fit(df[["cerveja"]], df["aprovado"])
arvore_d2_predict = arvore_d2.predict(df[["cerveja"]].drop_duplicates())
arvore_d2_proba = arvore_d2.predict_proba(df[["cerveja"]].drop_duplicates())[:, 1]

plt.title("Modelo de Arvore D2 de Decisão")
plt.xlabel("Cerveja")
plt.ylabel("Aprovado")
plt.plot(df['cerveja'], df['aprovado'], 'o', color='purple')
plt.plot(df["cerveja"].drop_duplicates(), arvore_d2_predict, color="green")
plt.plot(df["cerveja"].drop_duplicates(), arvore_d2_proba, color="magenta")
plt.hlines(0.5, xmin=1, xmax=9, linestyle="--", color="black")

plt.legend(
    [
        "Notas",
        "Arvore D2 Predict",
        "Arvore D2 Proba",
        "Linha de media"
    ]
)

In [0]:
nb = naive_bayes.GaussianNB()
nb.fit(df[["cerveja"]], df["aprovado"])
nb_predict = nb.predict(df[["cerveja"]].drop_duplicates())
nb_proba = nb.predict_proba(df[["cerveja"]].drop_duplicates())[:,1]


plt.grid(True)
plt.title("Modelo de Naive Bayes")
plt.xlabel("Cerveja")
plt.ylabel("Aprovado")
plt.plot(df['cerveja'], df['aprovado'], 'o', color='purple')
plt.plot(df["cerveja"].drop_duplicates(), nb_predict, color="green")
plt.plot(df["cerveja"].drop_duplicates(), nb_proba, color="magenta")

plt.hlines(0.5, xmin=1, xmax=9, linestyle="--", color="black")
plt.legend(
    [
        "Notas",
        "Naive Bayes Predict",
        "Naive Bayes Proba",
        "Linha media"
    ]
)

In [0]:

plt.grid(True)
plt.title("Comparação entre modelos")
plt.xlabel("Cerveja")
plt.ylabel("Aprovado")
plt.plot(df['cerveja'], df['aprovado'], 'o', color='purple')
#plt.plot(df["cerveja"].drop_duplicates(), arvore_full_predict, color="tomato")
#plt.plot(df["cerveja"].drop_duplicates(), arvore_full_proba, color="red")

plt.plot(df['cerveja'].drop_duplicates(), reg_predict, color='tomato')
plt.plot(df['cerveja'].drop_duplicates(), reg_proba, color='red')

plt.plot(df["cerveja"].drop_duplicates(), arvore_d2_predict, color="blue")
plt.plot(df["cerveja"].drop_duplicates(), arvore_d2_proba, color="black")

plt.plot(df["cerveja"].drop_duplicates(), nb_predict, color="green")
plt.plot(df["cerveja"].drop_duplicates(), nb_proba, color="magenta")

plt.hlines(0.5, xmin=1, xmax=9, linestyle="--", color="black")
plt.legend(
    [
        "Notas",
        "Regressao Predict",
        "Regressao Proba",
        "Arvore D2 Predict",
        "Arvore D2 Proba",
        "Naive Bayes Predict",
        "Naive Bayes Proba",
        "Observação"
    ]
)