In [1]:
import pandas as pd
import numpy as np

movies_dev = pd.read_excel('Dev_Set/dev_set_groundtruth_and_trailers.xls',usecols=['movie','goodforairplane','filename'],index_col='filename')
movies_test = pd.read_csv('Test_set/test_set_labels.csv',names=['movie','filename','goodforairplane'],header=None, skiprows=1,index_col='filename',sep=';',dtype={'goodforairplane':int})


movies = pd.concat([movies_dev,movies_test])

In [2]:
import os
import xml.etree.cElementTree as et

for filename in os.listdir('Dev_Set/XML'):
    tree = et.parse('Dev_Set/XML/'+filename)
    root = tree.getroot()
    tmp = root[0]
    
    for i in tmp.attrib:
        movies.loc[filename[:-4],i] = tmp.get(i)

for filename in os.listdir('Test_Set/XML'):
    tree = et.parse('Test_Set/XML/'+filename)
    root = tree.getroot()
    tmp = root[0]
    
    for i in tmp.attrib:
        movies.loc[filename[:-4],i] = tmp.get(i)
        
        

In [3]:
visuals = pd.DataFrame()
names = []
for file in os.listdir("Dev_Set/vis_descriptors"):
    data = pd.read_csv(os.path.join(r"Dev_Set/vis_descriptors",file),header=None)
    name = file.split('.')
    name = name[0]
    names.append(name)
    data = data.mean(axis=0)
    data = data.transpose()
    visuals = visuals.append(data,ignore_index=True)
    
for file in os.listdir("Test_Set/vis_descriptors"):
    data = pd.read_csv(os.path.join(r"Test_Set/vis_descriptors",file),header=None)
    name = file.split('.')
    name = name[0]
    names.append(name)
    data = data.mean(axis=0)
    data = data.transpose()
    visuals = visuals.append(data,ignore_index=True)
    
visuals.insert(0,'filename',names)
visuals.set_index('filename',inplace=True)

In [4]:
audio = pd.DataFrame()
names = []
for file in os.listdir("Dev_Set/audio_descriptors"):
    data = pd.read_csv(os.path.join(r"Dev_Set/audio_descriptors",file),header=None)
    name = file.split('.')
    name = name[0]
    names.append(name)
    data = data.mean(axis=1)
    audio = audio.append(data,ignore_index=True)
    
for file in os.listdir("Test_Set/audio_descriptors"):
    data = pd.read_csv(os.path.join(r"Test_Set/audio_descriptors",file),header=None)
    name = file.split('.')
    name = name[0]
    names.append(name)
    data = data.mean(axis=1)
    audio = audio.append(data,ignore_index=True)
    
audio.insert(0,'filename',names)
audio.set_index('filename',inplace=True)
        

In [5]:
movies.drop(['released','Website','imdbID','poster','tomatoConsensus','writer','DVD','plot','title','awards'], axis=1, inplace=True)

In [6]:
movies = movies.replace('N/A','NaN')
movies['imdbRating'] = movies['imdbRating'].astype(np.float)
movies['imdbVotes'] = movies['imdbVotes'].replace('[,]','',regex=True).astype(float)
movies['runtime'] = movies['runtime'].replace('[\smin]','',regex=True).astype(float)
movies['rated'] = movies['rated'].replace('NaN','NOT RATED')
movies['year'] = movies['year'].astype(float)
movies['metascore'] = movies['metascore'].astype(float)
movies['tomatoRating'] = movies['tomatoRating'].astype(float)
movies['tomatoUserRating'] = movies['tomatoUserRating'].astype(float)
movies['tomatoMeter'] = movies['tomatoMeter'].astype(float)
movies['tomatoUserMeter'] = movies['tomatoUserMeter'].astype(float)



In [7]:
movies.fillna(0,inplace=True)
visuals.fillna(0,inplace=True)
audio.fillna(0,inplace=True)

In [8]:
metadata = movies[['language','year','genre','country','runtime','rated']]
userrating = movies[['imdbRating','metascore','tomatoRating','tomatoUserRating','tomatoMeter','tomatoUserMeter']]
meta_and_user = movies[['language','year','genre','country','runtime','rated','imdbRating','metascore','tomatoRating',
                       'tomatoUserRating','tomatoMeter','tomatoUserMeter']]

In [9]:
import Encoding as enc
import warnings
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    enc.one_hot_encode(metadata,'language',True)
    enc.one_hot_encode(metadata,'genre',True)
    enc.one_hot_encode(metadata, 'country',True)
    enc.one_hot_encode(metadata, 'rated',True)

metadata.drop(['language','country','rated','genre'], axis=1, inplace=True)

In [10]:
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    enc.one_hot_encode(meta_and_user,'language',True)
    enc.one_hot_encode(meta_and_user, 'country',True)
    enc.one_hot_encode(meta_and_user, 'genre',True)
    enc.one_hot_encode(meta_and_user, 'rated',True)

meta_and_user.drop(['language','country','rated','genre'], axis=1, inplace=True)

In [11]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors import NearestCentroid
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB

knn = KNeighborsClassifier(n_neighbors=5, weights='uniform', algorithm='auto', leaf_size=30, p=2, metric='minkowski',
                           metric_params=None,n_jobs=-1)
nc = NearestCentroid(metric='euclidean', shrink_threshold=None)
tree = DecisionTreeClassifier(criterion='gini', splitter='best', max_depth=None, min_samples_split=2, min_samples_leaf=1,
                              min_weight_fraction_leaf=0.0, max_features=None, random_state=0, max_leaf_nodes=None,
                              class_weight=None, presort=False)
log = LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1,
                         class_weight=None, random_state=0, solver='liblinear', max_iter=100, multi_class='ovr', 
                         verbose=0, warm_start=False, n_jobs=-1)
svm = SVC(C=1.0, kernel='rbf', degree=3, gamma='auto', coef0=0.0, shrinking=True, probability=False, tol=0.001, 
          cache_size=200, class_weight=None, verbose=False, max_iter=-1, decision_function_shape=None, random_state=0)
bag = BaggingClassifier(base_estimator=None, n_estimators=10, max_samples=1.0, max_features=1.0, bootstrap=True, 
                        bootstrap_features=False, oob_score=False, warm_start=False, n_jobs=-1, random_state=0, verbose=0)
rf = RandomForestClassifier(n_estimators=10, criterion='gini', max_depth=None, min_samples_split=2, min_samples_leaf=1, 
                            min_weight_fraction_leaf=0.0, max_features='auto', max_leaf_nodes=None, bootstrap=True, 
                            oob_score=False, n_jobs=-1, random_state=0, verbose=0, warm_start=False, class_weight=None)
ada = AdaBoostClassifier(base_estimator=None, n_estimators=50, learning_rate=1.0, algorithm='SAMME.R', random_state=0)
gb = GradientBoostingClassifier(loss='deviance', learning_rate=0.1, n_estimators=100, subsample=1.0, min_samples_split=2,
                                min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_depth=3, init=None, random_state=0,
                                max_features=None, verbose=0, max_leaf_nodes=None, warm_start=False, presort='auto')
gauss = GaussianNB()

In [20]:
from sklearn.model_selection import cross_val_score

algo_pool = [knn,nc,tree,log,svm,bag,rf,ada,gb,gauss]

for i in algo_pool:
    f1 = np.mean(cross_val_score(estimator=i,X=metadata[0:95],y=movies[0:95].loc[:,'goodforairplane'],
                         scoring='f1',cv=10,n_jobs=-1))
    precision = np.mean(cross_val_score(estimator=i,X=metadata[0:95],y=movies[0:95].loc[:,'goodforairplane'],
                         scoring='precision',cv=10,n_jobs=-1))
    recall = np.mean(cross_val_score(estimator=i,X=metadata[0:95],y=movies[0:95].loc[:,'goodforairplane'],
                         scoring='recall',cv=10,n_jobs=-1))
    print(str(i).split('(')[0])
    print(f'Precision: {precision}, Recall: {recall}, f1: {f1}')
    print()

KNeighborsClassifier
Precision: 0.5956349206349205, Recall: 0.6366666666666666, f1: 0.5642607392607393

NearestCentroid
Precision: 0.5595238095238094, Recall: 0.5366666666666666, f1: 0.5382478632478632

DecisionTreeClassifier
Precision: 0.4833333333333333, Recall: 0.5466666666666666, f1: 0.4954545454545454

LogisticRegression
Precision: 0.5125, Recall: 0.56, f1: 0.5244599844599845

SVC
Precision: 0.5000396825396825, Recall: 0.6766666666666667, f1: 0.5602319902319903

BaggingClassifier
Precision: 0.5125, Recall: 0.43, f1: 0.42575868575868575

RandomForestClassifier
Precision: 0.4723809523809524, Recall: 0.4633333333333334, f1: 0.45357142857142857

AdaBoostClassifier
Precision: 0.5148015873015873, Recall: 0.5233333333333333, f1: 0.49172327672327676

GradientBoostingClassifier
Precision: 0.45861111111111114, Recall: 0.5033333333333333, f1: 0.46553058053058055

GaussianNB
Precision: 0.45, Recall: 0.13333333333333336, f1: 0.19523809523809527



In [21]:
for i in algo_pool:
    f1 = np.mean(cross_val_score(estimator=i,X=meta_and_user[0:95],y=movies[0:95].loc[:,'goodforairplane'],
                         scoring='f1',cv=10,n_jobs=-1))
    precision = np.mean(cross_val_score(estimator=i,X=meta_and_user[0:95],y=movies[0:95].loc[:,'goodforairplane'],
                         scoring='precision',cv=10,n_jobs=-1))
    recall = np.mean(cross_val_score(estimator=i,X=meta_and_user[0:95],y=movies[0:95].loc[:,'goodforairplane'],
                         scoring='recall',cv=10,n_jobs=-1))
    print(str(i).split('(')[0])
    print(f'Precision: {precision}, Recall: {recall}, f1: {f1}')
    print()

KNeighborsClassifier
Precision: 0.6058333333333332, Recall: 0.6566666666666665, f1: 0.6193706293706293

NearestCentroid
Precision: 0.5913095238095238, Recall: 0.6133333333333334, f1: 0.5925019425019424

DecisionTreeClassifier
Precision: 0.4980158730158729, Recall: 0.55, f1: 0.49095404595404596

LogisticRegression
Precision: 0.5567460317460318, Recall: 0.6, f1: 0.5591286491286491

SVC
Precision: 0.5140873015873015, Recall: 0.73, f1: 0.6006327006327006

BaggingClassifier
Precision: 0.4666666666666667, Recall: 0.41000000000000003, f1: 0.4160461760461761

RandomForestClassifier
Precision: 0.43047619047619046, Recall: 0.37, f1: 0.38722222222222225

AdaBoostClassifier
Precision: 0.4479761904761904, Recall: 0.4333333333333333, f1: 0.42347707847707855

GradientBoostingClassifier
Precision: 0.4967857142857143, Recall: 0.55, f1: 0.4962753912753913

GaussianNB
Precision: 0.5, Recall: 0.17, f1: 0.23968253968253966



In [22]:
for i in algo_pool:
    f1 = np.mean(cross_val_score(estimator=i,X=visuals[0:95],y=movies[0:95].loc[:,'goodforairplane'],
                         scoring='f1',cv=10,n_jobs=-1))
    precision = np.mean(cross_val_score(estimator=i,X=visuals[0:95],y=movies[0:95].loc[:,'goodforairplane'],
                         scoring='precision',cv=10,n_jobs=-1))
    recall = np.mean(cross_val_score(estimator=i,X=meta_and_user[0:95],y=movies[0:95].loc[:,'goodforairplane'],
                         scoring='recall',cv=10,n_jobs=-1))
    print(str(i).split('(')[0])
    print(f'Precision: {precision}, Recall: {recall}, f1: {f1}')
    print()

KNeighborsClassifier
Precision: 0.4938095238095238, Recall: 0.6566666666666665, f1: 0.47232323232323237

NearestCentroid
Precision: 0.6583333333333333, Recall: 0.6133333333333334, f1: 0.42976190476190473

DecisionTreeClassifier
Precision: 0.5288095238095238, Recall: 0.55, f1: 0.5272150072150072

LogisticRegression
Precision: 0.549047619047619, Recall: 0.6, f1: 0.5850738150738151

SVC
Precision: 0.5569444444444445, Recall: 0.73, f1: 0.7085714285714285

BaggingClassifier
Precision: 0.5166666666666667, Recall: 0.41000000000000003, f1: 0.47478354978354986

RandomForestClassifier
Precision: 0.5401190476190475, Recall: 0.37, f1: 0.51509324009324

AdaBoostClassifier
Precision: 0.4826190476190476, Recall: 0.4333333333333333, f1: 0.4621950271950272

GradientBoostingClassifier
Precision: 0.5351984126984127, Recall: 0.55, f1: 0.5832234432234432

GaussianNB
Precision: 0.575, Recall: 0.17, f1: 0.3268398268398268



In [23]:
for i in algo_pool:
    f1 = np.mean(cross_val_score(estimator=i,X=audio[0:95],y=movies[0:95].loc[:,'goodforairplane'],
                         scoring='f1',cv=10,n_jobs=-1))
    precision = np.mean(cross_val_score(estimator=i,X=audio[0:95],y=movies[0:95].loc[:,'goodforairplane'],
                         scoring='precision',cv=10,n_jobs=-1))
    recall = np.mean(cross_val_score(estimator=i,X=audio[0:95],y=movies[0:95].loc[:,'goodforairplane'],
                         scoring='recall',cv=10,n_jobs=-1))
    print(str(i).split('(')[0])
    print(f'Precision: {precision}, Recall: {recall}, f1: {f1}')
    print()

KNeighborsClassifier
Precision: 0.47214285714285714, Recall: 0.49333333333333346, f1: 0.4702974802974803

NearestCentroid
Precision: 0.5933333333333334, Recall: 0.24333333333333335, f1: 0.33430735930735933

DecisionTreeClassifier
Precision: 0.45999999999999996, Recall: 0.42333333333333334, f1: 0.43787878787878787

LogisticRegression
Precision: 0.4978571428571429, Recall: 0.5633333333333332, f1: 0.5227505827505826

SVC
Precision: 0.4773015873015873, Recall: 0.69, f1: 0.5608458208458209

BaggingClassifier
Precision: 0.3966666666666666, Recall: 0.33, f1: 0.3508080808080808

RandomForestClassifier
Precision: 0.5333333333333333, Recall: 0.4166666666666667, f1: 0.43199134199134204

AdaBoostClassifier
Precision: 0.5069047619047619, Recall: 0.47333333333333333, f1: 0.48033799533799526

GradientBoostingClassifier
Precision: 0.5214285714285714, Recall: 0.5166666666666667, f1: 0.5157109557109556

GaussianNB
Precision: 0.3738095238095238, Recall: 0.33333333333333337, f1: 0.3504273504273504



In [16]:
import LVW

metadata_selected_features = LVW.lvw(metadata[0:95],movies[0:95].loc[:,'goodforairplane'],74,0.)
meta_and_user_selected_fetures = LVW.lvw(meta_and_user[0:95],movies[0:95].loc[:,'goodforairplane'],74,0.)
visuals_selected_features = LVW.lvw(visuals[0:95],movies[0:95].loc[:,'goodforairplane'],74,0.)
audio_selected_features = LVW.lvw(audio[0:95],movies[0:95].loc[:,'goodforairplane'],74,0.)


Index(['American Sign Language', 'Documentary', 'Portuguese', 'Dutch',
       'Italian', 'Swiss German', 'Icelandic', 'Filipino', 'Turkish',
       'Guatemala', 'Sci-Fi', 'Cantonese', 'English', 'Musical', 'Serbian',
       'Swahili', 'runtime', 'Yiddish', 'Korean', 'Swedish', 'Japanese',
       'Chinese', 'Mystery', 'Biography', 'Sport', 'Pawnee', 'German',
       'Navajo', 'Western', 'Argentina', 'Action', 'Egyptian (Ancient)',
       'Flemish', 'Comedy', 'Latin', 'War', 'Romanian', 'Panjabi', 'Drama',
       'Romance', 'Mongolian', 'Hindi', 'Music', 'Sioux', 'Horror', 'Fantasy',
       'Danish', 'Croatian', 'Hungarian', 'Bengali', 'Konkani', 'Mandarin',
       'Arabic', 'Family', 'Thriller', 'Russia', 'Hmong', 'year', 'Adventure',
       'Animation', 'History', 'Vietnamese', 'Greek', 'Inuktitut', 'Algonquin',
       'Russian', 'Urdu', 'Sign Languages', 'Catalan', 'Crime', 'French',
       'Scottish Gaelic', 'Bosnian', 'Spanish'],
      dtype='object')
Index(['Cantonese', 'Egyptian (

KeyboardInterrupt: 

In [None]:
meta_algo_pool = [knn,nc,tree,log,svm,bag,rf,ada,gb]
text_algo_pool = [gauss,knn,svm]
visual_algo_pool = [knn,tree,log,svm,rf,ada,gb]
audio_algo_pool = [log,gb]

In [None]:
for i in meta_algo_pool:
    f1 = np.mean(cross_val_score(estimator=i,X=metadata[0:95].loc[metadata_selected_features],y=movies[0:95].loc[:,'goodforairplane'],
                         scoring='f1',cv=10,n_jobs=-1))
    precision = np.mean(cross_val_score(estimator=i,X=metadata[0:95].loc[metadata_selected_features],y=movies[0:95].loc[:,'goodforairplane'],
                         scoring='precision',cv=10,n_jobs=-1))
    recall = np.mean(cross_val_score(estimator=i,X=metadata[0:95].loc[metadata_selected_features],y=movies[0:95].loc[:,'goodforairplane'],
                         scoring='recall',cv=10,n_jobs=-1))
    print(str(i).split('(')[0])
    print(f'Precision: {precision}, Recall: {recall}, f1: {f1}')
    print()
    

In [None]:
for i in visual_algo_pool:
    f1 = np.mean(cross_val_score(estimator=i,X=visuals[0:95].loc[visuals_selected_features],y=movies[0:95].loc[:,'goodforairplane'],
                         scoring='f1',cv=10,n_jobs=-1))
    precision = np.mean(cross_val_score(estimator=i,X=visuals[0:95].loc[visuals_selected_features],y=movies[0:95].loc[:,'goodforairplane'],
                         scoring='precision',cv=10,n_jobs=-1))
    recall = np.mean(cross_val_score(estimator=i,X=visuals[0:95].loc[visuals_selected_features],y=movies[0:95].loc[:,'goodforairplane'],
                         scoring='recall',cv=10,n_jobs=-1))
    print(str(i).split('(')[0])
    print(f'Precision: {precision}, Recall: {recall}, f1: {f1}')
    print()
    

In [None]:
for i in audio_algo_pool:
    f1 = np.mean(cross_val_score(estimator=i,X=audio[0:95].loc[audio_selected_features],y=movies[0:95].loc[:,'goodforairplane'],
                         scoring='f1',cv=10,n_jobs=-1))
    precision = np.mean(cross_val_score(estimator=i,X=audio[0:95].loc[audio_selected_features],y=movies[0:95].loc[:,'goodforairplane'],
                         scoring='precision',cv=10,n_jobs=-1))
    recall = np.mean(cross_val_score(estimator=i,X=audio[0:95].loc[audio_selected_features],y=movies[0:95].loc[:,'goodforairplane'],
                         scoring='recall',cv=10,n_jobs=-1))
    print(str(i).split('(')[0])
    print(f'Precision: {precision}, Recall: {recall}, f1: {f1}')
    print()
    

In [None]:
for i in text_algo_pool:
    f1 = np.mean(cross_val_score(estimator=i,X=visuals[0:95].loc[visuals_selected_features],y=movies[0:95].loc[:,'goodforairplane'],
                         scoring='f1',cv=10,n_jobs=-1))
    precision = np.mean(cross_val_score(estimator=i,X=visuals[0:95].loc[visuals_selected_features],y=movies[0:95].loc[:,'goodforairplane'],
                         scoring='precision',cv=10,n_jobs=-1))
    recall = np.mean(cross_val_score(estimator=i,X=visuals[0:95].loc[visuals_selected_features],y=movies[0:95].loc[:,'goodforairplane'],
                         scoring='recall',cv=10,n_jobs=-1))
    print(str(i).split('(')[0])
    print(f'Precision: {precision}, Recall: {recall}, f1: {f1}')
    print()
    

In [None]:
knn_meta = KNeighborsClassifier(n_neighbors=5, weights='uniform', algorithm='auto', leaf_size=30, p=2, metric='minkowski',
                           metric_params=None,n_jobs=-1)

nc_meta = NearestCentroid(metric='euclidean', shrink_threshold=None)

tree_meta = DecisionTreeClassifier(criterion='gini', splitter='best', max_depth=None, min_samples_split=2, min_samples_leaf=1,
                              min_weight_fraction_leaf=0.0, max_features=None, random_state=0, max_leaf_nodes=None,
                              class_weight=None, presort=False)

log_meta = LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1,
                         class_weight=None, random_state=0, solver='liblinear', max_iter=100, multi_class='ovr', 
                         verbose=0, warm_start=False, n_jobs=-1)

svm_meta = SVC(C=1.0, kernel='rbf', degree=3, gamma='auto', coef0=0.0, shrinking=True, probability=False, tol=0.001, 
          cache_size=200, class_weight=None, verbose=False, max_iter=-1, decision_function_shape=None, random_state=0)

bag_meta = BaggingClassifier(base_estimator=None, n_estimators=10, max_samples=1.0, max_features=1.0, bootstrap=True, 
                        bootstrap_features=False, oob_score=False, warm_start=False, n_jobs=-1, random_state=0, verbose=0)

rf_meta = RandomForestClassifier(n_estimators=10, criterion='gini', max_depth=None, min_samples_split=2, min_samples_leaf=1, 
                            min_weight_fraction_leaf=0.0, max_features='auto', max_leaf_nodes=None, bootstrap=True, 
                            oob_score=False, n_jobs=-1, random_state=0, verbose=0, warm_start=False, class_weight=None)

ada_meta = AdaBoostClassifier(base_estimator=None, n_estimators=50, learning_rate=1.0, algorithm='SAMME.R', random_state=0)

gb_meta = GradientBoostingClassifier(loss='deviance', learning_rate=0.1, n_estimators=100, subsample=1.0, min_samples_split=2,
                                min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_depth=3, init=None, random_state=0,
                                max_features=None, verbose=0, max_leaf_nodes=None, warm_start=False, presort='auto')

gauss_text = GaussianNB()

knn_text = KNeighborsClassifier(n_neighbors=5, weights='uniform', algorithm='auto', leaf_size=30, p=2, metric='minkowski',
                           metric_params=None,n_jobs=-1)

svm_text = SVC(C=1.0, kernel='rbf', degree=3, gamma='auto', coef0=0.0, shrinking=True, probability=False, tol=0.001, 
          cache_size=200, class_weight=None, verbose=False, max_iter=-1, decision_function_shape=None, random_state=0)

knn_vis = KNeighborsClassifier(n_neighbors=5, weights='uniform', algorithm='auto', leaf_size=30, p=2, metric='minkowski',
                           metric_params=None,n_jobs=-1)

tree_vis = DecisionTreeClassifier(criterion='gini', splitter='best', max_depth=None, min_samples_split=2, min_samples_leaf=1,
                              min_weight_fraction_leaf=0.0, max_features=None, random_state=0, max_leaf_nodes=None,
                              class_weight=None, presort=False)

log_vis = LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1,
                         class_weight=None, random_state=0, solver='liblinear', max_iter=100, multi_class='ovr', 
                         verbose=0, warm_start=False, n_jobs=-1)

svm_vis = SVC(C=1.0, kernel='rbf', degree=3, gamma='auto', coef0=0.0, shrinking=True, probability=False, tol=0.001, 
          cache_size=200, class_weight=None, verbose=False, max_iter=-1, decision_function_shape=None, random_state=0)

rf_vis = RandomForestClassifier(n_estimators=10, criterion='gini', max_depth=None, min_samples_split=2, min_samples_leaf=1, 
                            min_weight_fraction_leaf=0.0, max_features='auto', max_leaf_nodes=None, bootstrap=True, 
                            oob_score=False, n_jobs=-1, random_state=0, verbose=0, warm_start=False, class_weight=None)

ada_vis = AdaBoostClassifier(base_estimator=None, n_estimators=50, learning_rate=1.0, algorithm='SAMME.R', random_state=0)

gb_vis = GradientBoostingClassifier(loss='deviance', learning_rate=0.1, n_estimators=100, subsample=1.0, min_samples_split=2,
                                min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_depth=3, init=None, random_state=0,
                                max_features=None, verbose=0, max_leaf_nodes=None, warm_start=False, presort='auto')

log_audio = LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1,
                         class_weight=None, random_state=0, solver='liblinear', max_iter=100, multi_class='ovr', 
                         verbose=0, warm_start=False, n_jobs=-1)

gb_audio = GradientBoostingClassifier(loss='deviance', learning_rate=0.1, n_estimators=100, subsample=1.0, min_samples_split=2,
                                min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_depth=3, init=None, random_state=0,
                                max_features=None, verbose=0, max_leaf_nodes=None, warm_start=False, presort='auto')

estimators = [('knn_meta',knn_meta),('nc_meta',nc_meta),('tree_meta',tree_meta),('log_meta',log_meta),('svm_meta',svm_meta),
              ('bag_meta',bag_meta),('rf_meta',rf_meta),('ada_meta',ada_meta),('gb_meta',gb_meta),('gauss_text',gauss_text),
              ('knn_text',knn_text),('svm_text',svm_text),('knn_vis',knn_vis),('tree_vis',tree_vis),('log_vis',log_vis),
              ('svm_vis',svm_vis),('rf_vis',rf_vis),('ada_vis',ada_vis),('gb_vis',gb_vis),('log_audio',log_audio),('gb_audio',gb_audio)]


In [None]:
from stacking_classifiers import stacking_classifier_performance_cv

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    res = stacking_classifier_performance_cv(estimators,metadata,metadata,metadata,metadata,movies[0:95].loc[:,'goodforairplane'])
    print(res)
