In [2]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import f1_score
from sklearn.preprocessing import StandardScaler
train = pd.read_csv('./persons_pics_train.csv')
test = pd.read_csv('./persons_pics_reserved.csv')

X_train = train.drop(columns=['label'])
y_train = train['label']

sc = StandardScaler()
X_train_s = sc.fit_transform(X_train)
X_test_s = sc.transform(test)

pca = PCA(svd_solver='full')
pca.fit(X_train_s)
var = np.cumsum(pca.explained_variance_ratio_)
k_candidates = [np.argmax(var > t) + 1 for t in [0.95, 0.97, 0.98]]

best_f1 = -1
best_model = None
best_pred = None

for k in k_candidates:
    pca_k = PCA(n_components=k, svd_solver='full')
    X_train_p = pca_k.fit_transform(X_train_s)
    X_test_p = pca_k.transform(X_test_s)

    params = [{
        'kernel': ['rbf', 'poly'],
        'gamma': [1e-4, 5e-4, 1e-3, 5e-3],
        'C': [1, 10, 50, 100, 300, 1000],
        'degree': [2, 3, 4],
        'class_weight': [None, 'balanced']
    }]

    cv = GridSearchCV(SVC(), params, refit=True, verbose=3, n_jobs=-1)
    cv.fit(X_train_p, y_train)

    model = cv.best_estimator_
    pred = model.predict(X_train_p)
    f1 = f1_score(y_train, pred, average='weighted')

    if f1 > best_f1:
        best_f1 = f1
        best_model = model
        best_pred = best_model.predict(X_test_p)

preds_list = best_pred.tolist()
preds_list

Fitting 5 folds for each of 288 candidates, totalling 1440 fits
Fitting 5 folds for each of 288 candidates, totalling 1440 fits
Fitting 5 folds for each of 288 candidates, totalling 1440 fits


['George W Bush',
 'Donald Rumsfeld',
 'Tony Blair',
 'Jacques Chirac',
 'Tony Blair',
 'Colin Powell',
 'Donald Rumsfeld',
 'George W Bush',
 'Tony Blair',
 'Tony Blair',
 'Donald Rumsfeld',
 'Hugo Chavez',
 'George W Bush',
 'Hugo Chavez',
 'Colin Powell',
 'George W Bush',
 'Ariel Sharon',
 'Colin Powell',
 'John Ashcroft',
 'Gerhard Schroeder',
 'Ariel Sharon',
 'George W Bush',
 'Colin Powell',
 'George W Bush',
 'George W Bush',
 'Donald Rumsfeld',
 'Donald Rumsfeld',
 'Tony Blair',
 'Serena Williams',
 'Jean Chretien',
 'George W Bush',
 'George W Bush',
 'George W Bush',
 'George W Bush',
 'George W Bush',
 'Donald Rumsfeld',
 'Donald Rumsfeld',
 'Colin Powell',
 'George W Bush',
 'Gerhard Schroeder',
 'Colin Powell',
 'Donald Rumsfeld',
 'Gerhard Schroeder',
 'George W Bush',
 'Tony Blair',
 'George W Bush',
 'George W Bush',
 'Gerhard Schroeder',
 'Colin Powell',
 'Tony Blair',
 'Serena Williams',
 'Jean Chretien',
 'Tony Blair',
 'George W Bush',
 'Colin Powell',
 'George W 