# Avaliação dos Resultados - Oscar IMDB

In [1]:
import os
import pandas as pd
import pickle


from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import cross_val_score, KFold
from sklearn.dummy import DummyClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.svm import SVC

In [2]:
os.chdir("../tratados")
os.getcwd()

'/home/antero/Documentos/wrk/tratados'

In [3]:
with open('oscar_rotten.pkl', 'rb') as f:
    df_rotten_tomatoes_oscars=pickle.load(f)

## Importando o Dataframe

In [4]:
X_rotten_oscars = df_rotten_tomatoes_oscars.iloc[:, [7,8,10,11,12,13,14]]

In [5]:
Y_rotten_tomatoes_nominee = df_rotten_tomatoes_oscars.iloc[:, 17]

In [6]:
Y_rotten_tomatoes_winner = df_rotten_tomatoes_oscars.iloc[:, 16]

### Escalonamento dos valores

In [7]:
label_encoder = LabelEncoder()
scaler = StandardScaler()

### Tratamento de atributos categóricos

In [8]:
X_rotten_oscars = scaler.fit_transform(X_rotten_oscars)
Y_rotten_tomatoes_nominee = label_encoder.fit_transform(Y_rotten_tomatoes_nominee)
Y_rotten_tomatoes_winner = label_encoder.fit_transform(Y_rotten_tomatoes_winner)

In [9]:
resultados_DummyClassifier = []
resultados_NaiveBayes = []
resultados_RegressaoLogistica = []
resultados_KNN = []
resultados_RandomForest = []
resultados_MPLC = []
resultados_SGDC = []
resultados_SupportVector = []

In [10]:
for i in range(30):
    kfold = KFold(n_splits=10, shuffle=True, random_state=i)

    DummyCl = DummyClassifier(strategy = 'most_frequent')
    scores = cross_val_score(DummyCl, X_rotten_oscars, Y_rotten_tomatoes_nominee, cv = kfold)
    resultados_DummyClassifier.append(scores.mean())

    NaiveBayes= GaussianNB()
    scores = cross_val_score(NaiveBayes, X_rotten_oscars, Y_rotten_tomatoes_nominee, cv = kfold)
    resultados_NaiveBayes.append(scores.mean())

    RegressaoLogistica = LogisticRegression(dual=True, fit_intercept=True, penalty='l2', solver='liblinear')
    scores = cross_val_score(RegressaoLogistica, X_rotten_oscars, Y_rotten_tomatoes_nominee, cv = kfold)
    resultados_RegressaoLogistica.append(scores.mean())

    knn = KNeighborsClassifier(algorithm = 'auto', n_neighbors = 20, p = 1)
    scores = cross_val_score(RegressaoLogistica, X_rotten_oscars, Y_rotten_tomatoes_nominee, cv = kfold)
    resultados_KNN.append(scores.mean())

    RandomForest = RandomForestClassifier(criterion='gini', max_features='sqrt', min_samples_leaf=10, min_samples_split=2, n_estimators=100)
    scores = cross_val_score(RandomForest, X_rotten_oscars, Y_rotten_tomatoes_nominee, cv = kfold)
    resultados_RandomForest.append(scores.mean())

    MPLC = MLPClassifier(hidden_layer_sizes=(10, 4), learning_rate='adaptive', max_iter=1000, solver='lbfgs')
    scores = cross_val_score(MPLC, X_rotten_oscars, Y_rotten_tomatoes_nominee, cv = kfold)
    resultados_MPLC.append(scores.mean())

    SGDC = SGDClassifier(loss = 'log', max_iter = 3000, penalty = 'elasticnet')
    scores = cross_val_score(SGDC, X_rotten_oscars, Y_rotten_tomatoes_nominee, cv = kfold)
    resultados_SGDC.append(scores.mean())

    SV = SVC(kernel = 'linear')
    scores = cross_val_score(SV, X_rotten_oscars, Y_rotten_tomatoes_nominee, cv = kfold)
    resultados_SupportVector.append(scores.mean())

In [11]:
resultados = pd.DataFrame({'Dummy Classifier': resultados_DummyClassifier,
                          'Naive Bayes Classifier': resultados_NaiveBayes,
                          'Regressao Logistica': resultados_RegressaoLogistica,
                          'KNN':resultados_KNN,
                          'Random Forest': resultados_RandomForest,
                          'Multi-layer Perceptron': resultados_MPLC,
                          'Stochastic Gradient Descent': resultados_SGDC,
                          'Support Vector Classifier': resultados_SupportVector})

In [12]:
resultados.describe()

Unnamed: 0,Dummy Classifier,Naive Bayes Classifier,Regressao Logistica,KNN,Random Forest,Multi-layer Perceptron,Stochastic Gradient Descent,Support Vector Classifier
count,30.0,30.0,30.0,30.0,30.0,30.0,30.0,30.0
mean,0.9497251,0.910782,0.949181,0.949181,0.95045,0.949988,0.948536,0.9497251
std,1.596934e-16,0.000227,0.000129,0.000175,0.000246,0.000472,0.00068,1.596934e-16
min,0.9497251,0.910324,0.948931,0.948809,0.949969,0.949114,0.947159,0.9497251
25%,0.9497251,0.910629,0.949114,0.949068,0.950244,0.949664,0.94803,0.9497251
50%,0.9497251,0.910812,0.949175,0.949175,0.950519,0.950153,0.948595,0.9497251
75%,0.9497251,0.910919,0.949236,0.949328,0.95058,0.950275,0.949114,0.9497251
max,0.9497251,0.911362,0.949481,0.949542,0.951008,0.950886,0.949725,0.9497251


In [8]:
resultados.describe().round(8)

Unnamed: 0,Dummy Classifier,Naive Bayes Classifier,Regressao Logistica,KNN,Random Forest,Multi-layer Perceptron,Stochastic Gradient Descent,Support Vector Classifier
count,30.0,30.0,30.0,30.0,30.0,30.0,30.0,30.0
mean,0.949725,0.910782,0.949181,0.949181,0.95045,0.949988,0.948536,0.949725
std,0.0,0.000227,0.000129,0.000175,0.000246,0.000472,0.00068,0.0
min,0.949725,0.910324,0.948931,0.948809,0.949969,0.949114,0.947159,0.949725
25%,0.949725,0.910629,0.949114,0.949068,0.950244,0.949664,0.94803,0.949725
50%,0.949725,0.910812,0.949175,0.949175,0.950519,0.950153,0.948595,0.949725
75%,0.949725,0.910919,0.949236,0.949328,0.95058,0.950275,0.949114,0.949725
max,0.949725,0.911362,0.949481,0.949542,0.951008,0.950886,0.949725,0.949725
