In [None]:
import tensorflow as tf
device_name = tf.test.gpu_device_name()
print(f'Found GPU at: {device_name}')

In [None]:
import pandas as pd
import numpy as np
from xgboost import XGBClassifier
from sklearn.preprocessing import normalize
from sklearn.model_selection import StratifiedKFold, KFold
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV 
from sklearn.model_selection import train_test_split, StratifiedShuffleSplit
from sklearn.externals import joblib
from sklearn.metrics import fbeta_score
from sklearn.externals import joblib
from keras.models import load_model, Model
from keras.models import model_from_json
from  keras import backend as K
from sklearn.metrics import confusion_matrix
import seaborn as sns
from matplotlib import pyplot as plt
import cv2

In [None]:
df = pd.read_csv('../input/train.csv')
df.drop(df.columns[0], axis=1, inplace=True)
df.head()

In [None]:
x_train, y_train = df.loc[:, 'in0':'in62'], df.loc[:, 'out63':'out69']

In [None]:
df = pd.read_csv('../input/test.csv')
df.drop(df.columns[0], axis=1, inplace=True)
df.head()

In [None]:
x_test, y_test = df.loc[:, 'in0':'in62'], df.loc[:, 'out63':'out69']

In [None]:
x_train = np.vstack((x_train, x_test))
y_train = np.vstack((y_train, y_test))

In [None]:
# x_train = normalize(x_train)

In [None]:
x_train.shape, y_train.shape

In [None]:
y_train = [ np.argmax(temp) for temp in y_train ]

In [None]:
estimator = XGBClassifier()
param_grid = {
    'nthread': [16],
    'n_jobs' : [16],
    'n_estimators': [100],    
    'max_depth': [64],
    'learning_rate' : [1, 0.1, 0.01, 0.001],
    'objective':['multi:softmax']          
}
scoring = 'f1_micro'
cv = StratifiedKFold(7, random_state=42)

In [None]:
%%time
gs = GridSearchCV(estimator, param_grid, scoring=scoring, cv=cv, verbose=3)
gs.fit(x_train, y_train)

In [None]:
results = pd.DataFrame(gs.cv_results_).sort_values(by='mean_test_score', ascending=False)
results.head()

In [None]:
df = pd.read_csv('../input/validation.csv')
df.drop(df.columns[0], axis=1, inplace=True)
df.head()

In [None]:
df.columns = [f'f{num}' if num < 63 else f'out{num}' for num in range(70)]

In [None]:
df.head()

In [None]:
x_validation, y_validation = df.loc[:, 'f0':'f62'], df.loc[:, 'out63':'out69']

In [None]:
y_pred = gs.best_estimator_.predict(x_validation)

In [None]:
y_pred

In [None]:
# x_validation = normalize(x_validation)

In [None]:
y_validation.shape

In [None]:
from sklearn import preprocessing
lb = preprocessing.LabelBinarizer()

In [None]:
y_pred = lb.fit_transform(y_pred)
y_pred.shape

In [None]:
score = fbeta_score(y_validation, y_pred, beta=1, average='micro')
score

In [None]:
joblib.dump(gs.best_estimator_, "xgboost{}.dat".format(score))

In [None]:
y_true = np.array([np.argmax(value) for value in y_validation.as_matrix()])
y_true.shape

In [None]:
y_pred = np.array([np.argmax(value) for value in np.array(y_pred)])
y_pred.shape

In [None]:
y_true, y_pred

In [None]:
# Plot a confusion matrix
emotions_text = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral']

cm = confusion_matrix(y_true, y_pred)
cm_normalised = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
sns.set(font_scale=4.5) 
fig, ax = plt.subplots(figsize=(30,20))
ax = sns.heatmap(cm_normalised, annot=True, linewidths=2.5, square=True, linecolor="Green", 
                    cmap="Greens", yticklabels=emotions_text, xticklabels=emotions_text, vmin=0, vmax=np.max(cm_normalised), 
                    fmt=".2f", annot_kws={"size": 50})
ax.set(xlabel='Predicted label', ylabel='True label')