In [2]:
!pip install catboost

Collecting catboost
  Obtaining dependency information for catboost from https://files.pythonhosted.org/packages/e2/63/379617e3d982e8a66c9d66ebf4621d3357c7c18ad356473c335bffd5aba6/catboost-1.2.2-cp311-cp311-win_amd64.whl.metadata
  Downloading catboost-1.2.2-cp311-cp311-win_amd64.whl.metadata (1.2 kB)
Collecting graphviz (from catboost)
  Downloading graphviz-0.20.1-py3-none-any.whl (47 kB)
     ---------------------------------------- 0.0/47.0 kB ? eta -:--:--
     -------- ------------------------------- 10.2/47.0 kB ? eta -:--:--
     ---------------- --------------------- 20.5/47.0 kB 330.3 kB/s eta 0:00:01
     ------------------------ ------------- 30.7/47.0 kB 325.1 kB/s eta 0:00:01
     -------------------------------------- 47.0/47.0 kB 391.5 kB/s eta 0:00:00
Downloading catboost-1.2.2-cp311-cp311-win_amd64.whl (101.0 MB)
   ---------------------------------------- 0.0/101.0 MB ? eta -:--:--
   ---------------------------------------- 0.0/101.0 MB 2.0 MB/s eta 0:00:50
   -----

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import train_test_split
import catboost

In [4]:
feats_val = pd.read_csv('valence_features_200.csv', index_col=0)
feats_ar = pd.read_csv('arousal_features_200.csv', index_col=0)
feats_dom = pd.read_csv('dominance_features_200.csv', index_col=0)
feats_lik = pd.read_csv('liking_features_200.csv', index_col=0)

targets = pd.read_csv('za_klasifikaciju.csv', index_col=0)

In [5]:
targets = targets[['Valence', 'Arousal', 'Dominance', 'Liking']]
targets[targets < 4.5] = 0
targets[targets >= 4.5] = 1

In [6]:
targets.sample(15)

Unnamed: 0_level_0,Valence,Arousal,Dominance,Liking
Participant_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
27,1.0,0.0,1.0,0.0
26,1.0,1.0,1.0,1.0
30,1.0,1.0,1.0,1.0
2,1.0,1.0,1.0,1.0
13,1.0,1.0,0.0,1.0
16,0.0,1.0,0.0,1.0
11,1.0,0.0,1.0,0.0
15,1.0,1.0,1.0,1.0
22,1.0,1.0,1.0,1.0
15,1.0,0.0,1.0,1.0


# Modeling

In [7]:
cat_params = {
    'loss_function':'Logloss',
    'eval_metric':'F1',
    'learning_rate':0.001,
    'depth':5,
    'subsample': 0.8
}

for c in ['Valence', 'Arousal', 'Dominance', 'Liking']:

    if c == 'Valence':
        data = feats_val
    elif c == 'Arousal':
        data = feats_ar
    elif c == 'Dominance':
        data = feats_dom
    elif c == 'Liking':
        data = feats_lik
    cat_crossval = catboost.Pool(data=data, label=targets[c])

    cat_cv = catboost.cv(pool=cat_crossval,
                        params=cat_params,
                        num_boost_round=5000,
                        nfold=10,
                        verbose_eval=0,
                        early_stopping_rounds=15
                        )
    ind_max = np.argmax(cat_cv['test-F1-mean'])
    print(f'{c}: test F1 mean = {cat_cv.loc[ind_max, "test-F1-mean"]}, std = {cat_cv.loc[ind_max, "test-F1-std"]}')
    

Training on fold [0/10]

bestTest = 0.7788461538
bestIteration = 3

Training on fold [1/10]

bestTest = 0.7766990291
bestIteration = 1

Training on fold [2/10]

bestTest = 0.7788461538
bestIteration = 4

Training on fold [3/10]

bestTest = 0.7788461538
bestIteration = 2

Training on fold [4/10]

bestTest = 0.7788461538
bestIteration = 2

Training on fold [5/10]

bestTest = 0.7826086957
bestIteration = 2

Training on fold [6/10]

bestTest = 0.7751196172
bestIteration = 4

Training on fold [7/10]

bestTest = 0.7826086957
bestIteration = 1

Training on fold [8/10]

bestTest = 0.7804878049
bestIteration = 3

Training on fold [9/10]

bestTest = 0.7729468599
bestIteration = 2

Valence: test F1 mean = 0.7743195102634461, std = 0.0022154696163690676
Training on fold [0/10]

bestTest = 0.7772511848
bestIteration = 4

Training on fold [1/10]

bestTest = 0.7772511848
bestIteration = 3

Training on fold [2/10]

bestTest = 0.7884615385
bestIteration = 1

Training on fold [3/10]

bestTest = 0.780952

In [8]:
cat_params = {
    'loss_function':'Logloss',
    'eval_metric':'Accuracy',
    'learning_rate':0.001,
    'depth':5,
    'subsample': 0.8
}

for c in ['Valence', 'Arousal', 'Dominance', 'Liking']:

    if c == 'Valence':
        data = feats_val
    elif c == 'Arousal':
        data = feats_ar
    elif c == 'Dominance':
        data = feats_dom
    elif c == 'Liking':
        data = feats_lik
    cat_crossval = catboost.Pool(data=data, label=targets[c])

    cat_cv = catboost.cv(pool=cat_crossval,
                        params=cat_params,
                        num_boost_round=5000,
                        nfold=10,
                        verbose_eval=0,
                        early_stopping_rounds=15
                        )
    ind_max = np.argmax(cat_cv['test-Accuracy-mean'])
    print(f'{c}: test Accuracy mean = {cat_cv.loc[ind_max, "test-Accuracy-mean"]}, std = {cat_cv.loc[ind_max, "test-Accuracy-std"]}')

Training on fold [0/10]

bestTest = 0.6434108527
bestIteration = 1

Training on fold [1/10]

bestTest = 0.6434108527
bestIteration = 1

Training on fold [2/10]

bestTest = 0.640625
bestIteration = 1

Training on fold [3/10]

bestTest = 0.640625
bestIteration = 2

Training on fold [4/10]

bestTest = 0.640625
bestIteration = 1

Training on fold [5/10]

bestTest = 0.6484375
bestIteration = 2

Training on fold [6/10]

bestTest = 0.6328125
bestIteration = 4

Training on fold [7/10]

bestTest = 0.6484375
bestIteration = 1

Training on fold [8/10]

bestTest = 0.6456692913
bestIteration = 3

Training on fold [9/10]

bestTest = 0.6299212598
bestIteration = 0

Valence: test Accuracy mean = 0.6335910318622963, std = 0.007689552917263559
Training on fold [0/10]

bestTest = 0.6356589147
bestIteration = 4

Training on fold [1/10]

bestTest = 0.6356589147
bestIteration = 3

Training on fold [2/10]

bestTest = 0.65625
bestIteration = 1

Training on fold [3/10]

bestTest = 0.6484375
bestIteration = 0

