# Avaliação dos Resultados - Oscar IMDB

In [2]:
import os
import pandas as pd
import pickle


from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import cross_val_score, KFold
from sklearn.dummy import DummyClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.svm import SVC

In [3]:
os.chdir("../tratados")
os.getcwd()

'/home/antero/Documentos/tcc/tratados'

In [4]:
with open('oscar_rotten.pkl', 'rb') as f:
    df_rotten_tomatoes_oscars=pickle.load(f)

## Importando o Dataframe

In [5]:
X_rotten_oscars = df_rotten_tomatoes_oscars.iloc[:, [7,8,10,11,12,13,14]]

In [6]:
Y_rotten_tomatoes_nominee = df_rotten_tomatoes_oscars.iloc[:, 17]

In [7]:
Y_rotten_tomatoes_winner = df_rotten_tomatoes_oscars.iloc[:, 16]

### Escalonamento dos valores

In [8]:
label_encoder = LabelEncoder()
scaler = StandardScaler()

### Tratamento de atributos categóricos

In [8]:
X_rotten_oscars = scaler.fit_transform(X_rotten_oscars)
Y_rotten_tomatoes_nominee = label_encoder.fit_transform(Y_rotten_tomatoes_nominee)
Y_rotten_tomatoes_winner = label_encoder.fit_transform(Y_rotten_tomatoes_winner)

In [9]:
resultados_DummyClassifier = []
resultados_NaiveBayes = []
resultados_RegressaoLogistica = []
resultados_KNN = []
resultados_RandomForest = []
resultados_MPLC = []
resultados_SGDC = []
resultados_SupportVector = []

In [10]:
for i in range(30):
    kfold = KFold(n_splits=10, shuffle=True, random_state=i)

    DummyCl = DummyClassifier(strategy = 'most_frequent')
    scores = cross_val_score(DummyCl, X_rotten_oscars, Y_rotten_tomatoes_winner, cv = kfold)
    resultados_DummyClassifier.append(scores.mean())

    NaiveBayes= GaussianNB()
    scores = cross_val_score(NaiveBayes, X_rotten_oscars, Y_rotten_tomatoes_winner, cv = kfold)
    resultados_NaiveBayes.append(scores.mean())

    RegressaoLogistica = LogisticRegression(dual = False, fit_intercept=True, penalty='l1', solver='liblinear')
    scores = cross_val_score(RegressaoLogistica, X_rotten_oscars, Y_rotten_tomatoes_winner, cv = kfold)
    resultados_RegressaoLogistica.append(scores.mean())

    knn = KNeighborsClassifier(algorithm = 'auto', n_neighbors = 20, p = 2)
    scores = cross_val_score(RegressaoLogistica, X_rotten_oscars, Y_rotten_tomatoes_winner, cv = kfold)
    resultados_KNN.append(scores.mean())

    RandomForest = RandomForestClassifier(criterion='entropy', max_features='auto', min_samples_leaf=5, min_samples_split=2, n_estimators=40)
    scores = cross_val_score(RandomForest, X_rotten_oscars, Y_rotten_tomatoes_winner, cv = kfold)
    resultados_RandomForest.append(scores.mean())

    MPLC = MLPClassifier(hidden_layer_sizes=(5, 2), learning_rate='invscaling', max_iter=2000, solver='adam')
    scores = cross_val_score(MPLC, X_rotten_oscars, Y_rotten_tomatoes_winner, cv = kfold)
    resultados_MPLC.append(scores.mean())

    SGDC = SGDClassifier(loss = 'hinge', max_iter = 1000, penalty = 'elasticnet')
    scores = cross_val_score(SGDC, X_rotten_oscars, Y_rotten_tomatoes_winner, cv = kfold)
    resultados_SGDC.append(scores.mean())

    SV = SVC(kernel = 'poly')
    scores = cross_val_score(SV, X_rotten_oscars, Y_rotten_tomatoes_winner, cv = kfold)
    resultados_SupportVector.append(scores.mean())

In [11]:
resultados = pd.DataFrame({'Dummy Classifier': resultados_DummyClassifier,
                          'Naive Bayes Classifier': resultados_NaiveBayes,
                          'Regressao Logistica': resultados_RegressaoLogistica,
                          'KNN':resultados_KNN,
                          'Random Forest': resultados_RandomForest,
                          'Multi-layer Perceptron': resultados_MPLC,
                          'Stochastic Gradient Descent': resultados_SGDC,
                          'Support Vector Classifier': resultados_SupportVector})

In [12]:
resultados.describe()

Unnamed: 0,Dummy Classifier,Naive Bayes Classifier,Regressao Logistica,KNN,Random Forest,Multi-layer Perceptron,Stochastic Gradient Descent,Support Vector Classifier
count,30.0,30.0,30.0,30.0,30.0,30.0,30.0,30.0
mean,0.8609652,0.83784,0.864154,0.864152,0.872114,0.868526,0.861588,0.866701
std,3.856961e-16,0.000246,0.000149,0.000149,0.000861,0.001027,0.000724,0.000217
min,0.8609652,0.837385,0.863836,0.863836,0.870617,0.865791,0.859988,0.866341
25%,0.8609652,0.837645,0.86402,0.86402,0.871457,0.867883,0.861118,0.866539
50%,0.8609652,0.837813,0.864172,0.864142,0.872022,0.868723,0.861729,0.866707
75%,0.8609652,0.838042,0.864264,0.864264,0.872847,0.869288,0.861988,0.866891
max,0.8609652,0.838424,0.864447,0.864447,0.874221,0.870067,0.862798,0.867074


In [4]:
resultados.describe().round(8)

Unnamed: 0,Dummy Classifier,Naive Bayes Classifier,Regressao Logistica,KNN,Random Forest,Multi-layer Perceptron,Stochastic Gradient Descent,Support Vector Classifier
count,30.0,30.0,30.0,30.0,30.0,30.0,30.0,30.0
mean,0.860965,0.83784,0.864154,0.864152,0.872114,0.868526,0.861588,0.866701
std,0.0,0.000246,0.000149,0.000149,0.000861,0.001027,0.000724,0.000217
min,0.860965,0.837385,0.863836,0.863836,0.870617,0.865791,0.859988,0.866341
25%,0.860965,0.837645,0.86402,0.86402,0.871457,0.867883,0.861118,0.866539
50%,0.860965,0.837813,0.864172,0.864142,0.872022,0.868723,0.861729,0.866707
75%,0.860965,0.838042,0.864264,0.864264,0.872847,0.869288,0.861988,0.866891
max,0.860965,0.838424,0.864447,0.864447,0.874221,0.870067,0.862798,0.867074
