In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import AdaBoostClassifier

from sklearn.dummy import DummyClassifier
np.random.seed(5)

In [2]:
df = pd.read_csv('buscas.csv')

In [3]:
df.head()

Unnamed: 0,home,busca,logado,comprou
0,0,algoritmos,1,1
1,0,java,0,1
2,1,algoritmos,0,1
3,1,ruby,1,0
4,1,ruby,0,1


In [4]:
dummies = pd.get_dummies(df['busca'])
df[dummies.columns] = dummies

In [5]:
df

Unnamed: 0,home,busca,logado,comprou,algoritmos,java,ruby
0,0,algoritmos,1,1,1,0,0
1,0,java,0,1,0,1,0
2,1,algoritmos,0,1,1,0,0
3,1,ruby,1,0,0,0,1
4,1,ruby,0,1,0,0,1
...,...,...,...,...,...,...,...
995,0,ruby,0,0,0,0,1
996,0,ruby,0,1,0,0,1
997,0,java,1,1,0,1,0
998,1,algoritmos,0,1,1,0,0


In [6]:
X = df[['home', 'logado', 'algoritmos', 'java', 'ruby']]
y = df['comprou']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.1, stratify=y)

In [7]:
def fit_and_predict(models, X_train, X_test, y_train, y_test):
    
    best = [0]
    
    for model in models:
        model.fit(X_train, y_train)
        accuracy = accuracy_score(model.predict(X_test), y_test)
        if accuracy > best[-1]:
            best = [model, accuracy]
    return best
        

In [8]:
models = [MultinomialNB(), AdaBoostClassifier()]

In [9]:
best_model, best_accuracy = fit_and_predict(models, X_train, X_test, y_train, y_test)

In [10]:
dummy = DummyClassifier(strategy='most_frequent')
dummy.fit(X_train, y_train)
d_predicted = dummy.predict(X_test)

In [11]:
print(f'{best_model}: {best_accuracy*100}% \n{dummy}: {accuracy_score(y_test, d_predicted)*100}%')

AdaBoostClassifier(): 86.0% 
DummyClassifier(strategy='most_frequent'): 83.0%
