In [1]:
import pandas as pd
import numpy as np
#from defs import mp_cv_score, stacked_model

In [2]:
df = pd.read_csv('data_modified.csv', low_memory=False)
df = df.drop(columns=['Unnamed: 0', 'Description', 'Reference number', 'Location', 'Functions'])
df = df.dropna()
print(df.shape)
df.head()

(46090, 29)


Unnamed: 0,Bracelet material,Brand,Case diameter,Case material,Condition,Dial,Gender,Model,Movement,Scope of delivery,...,Chronograph,GMT,Tachymeter,Moon phase,Annual calendar,Perpetual calendar,Tourbillon,Flyback,Alarm,Country
0,Steel,Bulgari,41.0,Steel,New,Black,Men's watch/Unisex,Octo,Automatic,"Original box, original papers",...,0,0,0,0,0,0,0,0,0,United States of America
1,Steel,Tissot,39.0,Steel,New,Mother of pearl,Men's watch/Unisex,PR 100,Quartz,"Original box, no original papers",...,0,0,0,0,0,0,0,0,0,Japan
2,Leather,Mido,41.0,Steel,New,Silver,Men's watch/Unisex,Belluna,Automatic,"Original box, original papers",...,0,0,0,0,0,0,0,0,0,Japan
3,Steel,Mido,42.0,Steel,New,Black,Men's watch/Unisex,Multifort GMT,Automatic,"Original box, original papers",...,0,0,0,0,0,0,0,0,0,Japan
4,Leather,Seiko,42.0,Steel,Unworn,Black,Men's watch/Unisex,Kinetic,Quartz,"Original box, original papers",...,0,0,0,0,0,0,0,0,0,Japan


In [3]:
df['Brand'].value_counts()[0:10]

Rolex             13050
Omega              4438
Cartier            2107
Breitling          1943
Seiko              1917
TAG Heuer          1715
Longines           1484
Hublot             1227
Patek Philippe     1167
Tudor              1138
Name: Brand, dtype: int64

In [4]:
df['Model'].value_counts()[0:10]

Datejust                              2626
Lady-Datejust                         1484
Submariner Date                       1192
Daytona                               1168
GMT-Master II                          935
Seamaster Diver 300 M                  584
Speedmaster Professional Moonwatch     576
Seamaster                              469
Seamaster Aqua Terra                   467
Grand Seiko                            462
Name: Model, dtype: int64

In [5]:
df = df[df["Year of production"] > 1900]
df['Decade'] = 0
for i in range(190, 220):
    start = i * 10
    end = start + 10
    df.loc[(df['Year of production'] >= start) & (df['Year of production'] < end), 'Decade'] = '{}s'.format(start)

In [6]:
df = df.loc[df['Case diameter'] > 14]
df['Diameter_group'] = 0
for i in range(0, 24):
    start = 14 + i*2
    end = start + 1
    df.loc[(df['Case diameter'] >= start) & (df['Case diameter'] <= end), 'Diameter_group'] = '{}-{}'.format(start, end)

In [7]:
functions = ['Date', 'Weekday', 'Month', 'Year', 'Chronograph', 'GMT', 
             'Tachymeter', 'Moon phase', 'Annual calendar', 'Perpetual calendar',
             'Tourbillon', 'Flyback', 'Alarm']

In [8]:
mov = df['Movement'].isin(['Manual winding', 'Automatic']).astype(int)
fun = df[functions].mul(mov, axis=0)

df = df.merge(fun, left_index=True, right_index=True, suffixes=(None, '_m'))

In [9]:
functions = ['Date', 'Weekday', 'Month', 'Year', 'Chronograph', 'GMT', 
             'Tachymeter', 'Moon phase', 'Annual calendar', 'Perpetual calendar',
             'Tourbillon', 'Flyback', 'Alarm']
functions = [s + '_m' for s in functions]

In [10]:
keep = df['Brand'].value_counts()
keep = keep[keep>4]
keep = keep.index.get_level_values(0).astype(str).values
df = df[df['Brand'].isin(keep)]

In [11]:
df = df.dropna()

# Feature based Classification

In [24]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, VotingClassifier, StackingClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split, RandomizedSearchCV, cross_val_score, GridSearchCV, ParameterGrid
from category_encoders.binary import BinaryEncoder
from category_encoders.target_encoder import TargetEncoder
import time

In [13]:
categorical_var = ['Bracelet material', 'Case material',
       'Condition', 'Dial', 'Gender', 'Movement', 'Scope of delivery',
       'private_seller', 'Crystal', 'Decade', 'Diameter_group', 'Country']

In [14]:
X = df[categorical_var + functions]
y = df['Brand']

transformers = [
    ColumnTransformer([('onehot', OneHotEncoder(handle_unknown='ignore'), categorical_var)], remainder='passthrough'),
    ColumnTransformer([('binary', BinaryEncoder(), categorical_var)], remainder='passthrough'),
]

### First test of different classifiers

In [15]:
%%time

transformers = [
    ColumnTransformer([('onehot', OneHotEncoder(handle_unknown='ignore'), categorical_var)], remainder='passthrough'),
    ColumnTransformer([('binary', BinaryEncoder(), categorical_var)], remainder='passthrough'),
]

models = [
    LogisticRegression(random_state=13, max_iter=500),
    KNeighborsClassifier(),
    RandomForestClassifier(random_state=13)
]

for transformer in transformers:
    print(transformer.transformers[0][0])
    for model in models:
        start_time = time.time()
        pipe = Pipeline([('tf', transformer), ('model', model)])
        scores = cross_val_score(pipe, X, y, cv=5, n_jobs=5)
        print('{}. Score: {}. Time: {}s'.format(model, np.mean(scores), time.time() - start_time))

onehot
LogisticRegression(max_iter=500, random_state=13). Score: 0.5286715206718866. Time: 144.82558846473694s
KNeighborsClassifier(). Score: 0.5508392277456381. Time: 15.697580099105835s
RandomForestClassifier(random_state=13). Score: 0.609401827311788. Time: 42.41515588760376s
binary
LogisticRegression(max_iter=500, random_state=13). Score: 0.4770488138242344. Time: 158.32193064689636s
KNeighborsClassifier(). Score: 0.5347411079558718. Time: 10.527743816375732s
RandomForestClassifier(random_state=13). Score: 0.595783687453977. Time: 10.082473516464233s
Wall time: 6min 21s


## Hyperparameter tuning for Random Forest

In [17]:
%%time

params = [
    {'model__max_depth': [50, 100, 200, None],
     'model__max_features': ['log2', 'auto'],
     'model__min_samples_leaf': [1, 2],
     'model__min_samples_split': [2, 5, 10],
     'model__n_estimators': [100, 200, 500]}]


transformer = ColumnTransformer([
        ('cat', BinaryEncoder(), categorical_var),
    ], remainder='passthrough')


pipe = Pipeline([('tf', transformer), ('model',RandomForestClassifier(random_state=13, n_jobs=5))])

grid = RandomizedSearchCV(estimator=pipe,
             param_distributions=params, n_jobs=1, n_iter=100, cv=3, verbose=1, random_state=13)

grid.fit(X, y)

print(grid.best_params_)
print(grid.best_score_)

Fitting 3 folds for each of 100 candidates, totalling 300 fits


  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical

  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical

  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical

  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical

  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical

  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical

  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical

  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical

  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):


{'model__n_estimators': 200, 'model__min_samples_split': 5, 'model__min_samples_leaf': 1, 'model__max_features': 'auto', 'model__max_depth': None}
0.5914113107143263
Wall time: 32min 17s


In [22]:
transformer = ColumnTransformer([('binary', BinaryEncoder(), categorical_var)], remainder='passthrough')

pipe = Pipeline([('tf', transformer), ('model',RandomForestClassifier(random_state=13, n_jobs=5, 
                                       n_estimators= 200, 
                                       min_samples_split=5,
                                       min_samples_leaf=1,
                                       max_features='auto',
                                       max_depth = None))])


np.mean(cross_val_score(pipe, X, y, cv=5, n_jobs=5))

0.6024405788128746

# Image/Text based classification

In [15]:
df_words = pd.read_csv('word_clusters.csv', low_memory=False)
df = df.merge(df_words, on='id')

X = df[map(str, range(0, 389))]
y = df['Brand']

### First test of different classifiers

In [24]:
%%time

models = [
    LogisticRegression(random_state=13, max_iter=1000),
    KNeighborsClassifier(),
    RandomForestClassifier(random_state=13),
]


for model in models:
    start_time = time.time()
    scores = cross_val_score(model, X, y, cv=5, n_jobs=5)
    print('{}. Score: {}. Time: {}s'.format(model, np.mean(scores), time.time() - start_time))



LogisticRegression(max_iter=1000, random_state=13). Score: 0.5656471125313856. Time: 92.00567746162415s




KNeighborsClassifier(). Score: 0.4074640493038119. Time: 12.727590084075928s




RandomForestClassifier(random_state=13). Score: 0.55243095183748. Time: 35.154783487319946s
Wall time: 2min 19s


### Hyperparameter tuning for random forest

In [16]:
%%time

params = [
    {'max_depth': [200, 500, None],
     'max_features': ['auto', 'log2'],
     'min_samples_leaf': [1, 2],
     'min_samples_split': [2, 20, 50],
     'n_estimators': [100, 200, 500]}]



grid = RandomizedSearchCV(estimator=RandomForestClassifier(random_state=13, n_jobs=5),
             param_distributions=params, n_jobs=1, n_iter=100, cv=3, verbose=1, random_state=13)

grid.fit(X, y)

print(grid.best_params_)
print(grid.best_score_)

Fitting 3 folds for each of 100 candidates, totalling 300 fits
{'n_estimators': 200, 'min_samples_split': 20, 'min_samples_leaf': 1, 'max_features': 'log2', 'max_depth': 200}
0.5628397402161429
Wall time: 52min 6s


In [17]:
model = RandomForestClassifier(random_state=13, n_jobs=5, 
                                       n_estimators= 500, 
                                       min_samples_split=20,
                                       min_samples_leaf=1,
                                       max_features='log2',
                                       max_depth = 200)


np.mean(cross_val_score(model, X, y, cv=5, n_jobs=5))



0.5657384158867839

## Stacking models

In [20]:
X = df
y = df['Brand']

In [22]:
%%time

n_cols = len(df[categorical_var + functions].columns)

m1 = RandomForestClassifier(random_state=13, n_jobs=5, n_estimators=200, min_samples_split=5, min_samples_leaf=1, max_features= 'auto', max_depth=None)
m2 = RandomForestClassifier(random_state=13, n_jobs=5, n_estimators=500, min_samples_split=20, min_samples_leaf=1, max_features= 'log2', max_depth=200)


pipe_1 = Pipeline([
    ('selector', ColumnTransformer([('selector', 'passthrough', categorical_var + functions)], remainder='drop')),
    ('encoder', ColumnTransformer([('encoder', BinaryEncoder(), list(range(0,n_cols)))], remainder='passthrough')),
    ('model', m1)
])

pipe_2 = Pipeline([
    ('selector', ColumnTransformer([('selector', 'passthrough', list(map(str, range(0, 389))))], remainder='drop')),
    ('model', m2)
])


clf = VotingClassifier(estimators=[('m1', pipe_1), ('m2', pipe_2)], voting='soft')

np.mean(cross_val_score(clf, X, y, cv=5, n_jobs=1))

  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):


Wall time: 2min 17s


0.70703035836567

In [26]:
%%time

models = [('m1', pipe_1),
          ('m2', pipe_2)]

clf = StackingClassifier(estimators=models, final_estimator=LogisticRegression(n_jobs=5))

np.mean(cross_val_score(clf, X, y, cv=5, n_jobs=1))

  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical

  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):


Wall time: 13min 9s


0.7341702807578179

In [27]:
%%time

models = [('m1', pipe_1),
          ('m2', pipe_2)]

clf = StackingClassifier(estimators=models, final_estimator=RandomForestClassifier(random_state=13, n_jobs=5))

np.mean(cross_val_score(clf, X, y, cv=5, n_jobs=1))

  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical

  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):
  elif pd.api.types.is_categorical(cols):


Wall time: 12min 19s


0.7201780415430268