In [10]:
import pickle
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.model_selection import LeaveOneOut
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.svm import SVC
import numpy as np

with open("data/processed/features.pkl", "rb") as f:
    data = pickle.load(f)

In [22]:
X_alpha = data["regional"]["ALPHA"]["X"]
y_alpha = data["regional"]["ALPHA"]["y"]

y_true = []
y_pred = []

error = {}

loo = LeaveOneOut()

for train_index, test_index in loo.split(X_alpha):
    X_train, X_test = X_alpha[train_index], X_alpha[test_index]
    y_train, y_test = y_alpha[train_index], y_alpha[test_index]

    scaler = StandardScaler()
    model = RandomForestClassifier(n_estimators=55, max_depth=19, random_state=33)

    scaler.fit(X_train)
    selector = SelectKBest(f_classif, k=25)
    selector.fit(X_train, y_train)

    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)

    X_train = selector.transform(X_train)
    X_test = selector.transform(X_test)

    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    if pred[0] != y_test[0]:
        error[test_index[0]] = error.get(test_index[0], {}) | {"alpha": (int(pred[0]), int(y_test[0]))}

    y_true.append(y_test[0])
    y_pred.append(pred[0])

y_true = np.array(y_true)
y_pred = np.array(y_pred)

acc = accuracy_score(y_true, y_pred)
cm = confusion_matrix(y_true, y_pred) 
acc, cm

(0.6111111111111112,
 array([[11, 10,  1],
        [ 7, 31,  2],
        [ 6,  9, 13]]))

In [23]:
X_beta = data["vector"]["BETA"]["X"]
y_beta = data["vector"]["BETA"]["y"]

y_true = []
y_pred = []
error_list_beta = []

loo = LeaveOneOut()

for train_index, test_index in loo.split(X_beta):
    X_train, X_test = X_beta[train_index], X_beta[test_index]
    y_train, y_test = y_beta[train_index], y_beta[test_index]

    scaler = StandardScaler()
    model = GradientBoostingClassifier(n_estimators=59, max_depth=2, learning_rate=0.57, random_state=33)

    scaler.fit(X_train)
    selector = SelectKBest(f_classif, k=40)
    selector.fit(X_train, y_train)

    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)

    X_train = selector.transform(X_train)
    X_test = selector.transform(X_test)

    model.fit(X_train, y_train)
    pred = model.predict(X_test)


    if pred[0] != y_test[0]:
        error[test_index[0]] = error.get(test_index[0], {}) | {"beta": (int(pred[0]), int(y_test[0]))}

    y_true.append(y_test[0])
    y_pred.append(pred[0])

y_true = np.array(y_true)
y_pred = np.array(y_pred)

acc = accuracy_score(y_true, y_pred)
cm = confusion_matrix(y_true, y_pred) 
acc, cm

(0.6111111111111112,
 array([[10,  8,  4],
        [ 2, 32,  6],
        [ 6,  9, 13]]))

In [8]:
X_theta = data["vector"]["THETA"]["X"]
y_theta = data["vector"]["THETA"]["y"]

y_true = []
y_pred = []


loo = LeaveOneOut()

for train_index, test_index in loo.split(X_theta):
    X_train, X_test = X_theta[train_index], X_theta[test_index]
    y_train, y_test = y_theta[train_index], y_theta[test_index]

    scaler = StandardScaler()
    model = GradientBoostingClassifier(n_estimators=130, max_depth=3, learning_rate=0.17, random_state=33)

    scaler.fit(X_train)

    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)

    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    y_true.append(y_test[0])
    y_pred.append(pred[0])

y_true = np.array(y_true)
y_pred = np.array(y_pred)

acc = accuracy_score(y_true, y_pred)
cm = confusion_matrix(y_true, y_pred) 
acc, cm

(0.5111111111111111,
 array([[ 7,  9,  6],
        [ 4, 30,  6],
        [ 7, 12,  9]]))

In [29]:
import random

X_alpha = data["regional"]["ALPHA"]["X"]
y_alpha = data["regional"]["ALPHA"]["y"]

X_delta = data["vector"]["DELTA"]["X"]
y_delta = data["vector"]["DELTA"]["y"]

y_true = []
y_pred = []

loo = LeaveOneOut()

for train_index, test_index in loo.split(X_delta):
    X_train, X_test = X_delta[train_index], X_delta[test_index]
    y_train, y_test = y_delta[train_index], y_delta[test_index]

    scaler = StandardScaler()
    model_delta = SVC(C=0.05, gamma="auto", kernel="linear", random_state=33)
    model_alpha = RandomForestClassifier(n_estimators=55, max_depth=19, random_state=33)
    model_beta = GradientBoostingClassifier(n_estimators=59, max_depth=2, learning_rate=0.57, random_state=33)

    selector_alpha = SelectKBest(f_classif, k=25)
    selector_alpha.fit(X_train, y_train)
    selector_beta = SelectKBest(f_classif, k=40)
    selector_beta.fit(X_train, y_train)

    scaler.fit(X_train)

    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)

    X_train_alpha = selector.transform(X_train)
    X_test_alpha = selector.transform(X_test)
    X_train_beta = selector.transform(X_train)
    X_test_beta = selector.transform(X_test)

    model_alpha.fit(X_train_alpha, y_train)
    pa = int(model_alpha.predict(X_test_alpha)[0])

    model_beta.fit(X_train_beta, y_train)
    pb = int(model_beta.predict(X_test_beta)[0])

    model_delta.fit(X_train, y_train)
    pd = int(model_delta.predict(X_test)[0])

    if (pa == pd) and (pa == pb):
        final_pred = pa
    elif (pa == pd) or (pa == pb):
        final_pred = pa
    elif (pd == pa) or (pd == pb):
        final_pred = pd
    else:
        final_pred = random.choice([pa, pb, pd])

    y_true.append(y_test[0])
    y_pred.append(final_pred)

y_true = np.array(y_true)
y_pred = np.array(y_pred)

acc = accuracy_score(y_true, y_pred)
cm = confusion_matrix(y_true, y_pred) 
acc, cm

(0.5111111111111111,
 array([[ 5, 11,  6],
        [ 0, 33,  7],
        [ 6, 14,  8]]))

In [None]:
for k, v in error.items():
    print(f"[{k}], [${error[k].get('alpha', '        ')}$], [{error[k].get('beta', '        ')}    ;    {error[k].get('delta', '        ')}")

1    |    (1, 0)    ;    (1, 0)    ;    (1, 0)
7    |    (1, 0)    ;                ;    (1, 0)
8    |    (1, 0)    ;    (1, 0)    ;    (1, 0)
9    |    (1, 0)    ;    (1, 0)    ;    (1, 0)
10    |    (1, 0)    ;    (2, 0)    ;            
11    |    (1, 0)    ;    (1, 0)    ;    (1, 0)
13    |    (1, 0)    ;    (1, 0)    ;    (1, 0)
14    |    (1, 0)    ;    (1, 0)    ;    (1, 0)
15    |    (1, 0)    ;    (1, 0)    ;    (1, 0)
19    |    (2, 0)    ;    (2, 0)    ;            
21    |    (1, 0)    ;                ;    (1, 0)
22    |    (0, 1)    ;                ;            
23    |    (0, 1)    ;                ;            
33    |    (0, 1)    ;                ;    (0, 1)
34    |    (2, 1)    ;                ;            
39    |    (2, 1)    ;                ;            
41    |    (0, 1)    ;                ;            
55    |    (0, 1)    ;    (2, 1)    ;            
58    |    (0, 1)    ;                ;    (2, 1)
60    |    (0, 1)    ;                ;            
62    

In [None]:
X_delta = data["vector"]["DELTA"]["X"]
y_delta = data["vector"]["DELTA"]["y"]

y_true = []
y_pred = []

error_list_delta = []


loo = LeaveOneOut()

for train_index, test_index in loo.split(X_delta):
    X_train, X_test = X_delta[train_index], X_delta[test_index]
    y_train, y_test = y_delta[train_index], y_delta[test_index]

    scaler = StandardScaler()
    model = SVC(C=0.05, gamma="auto", kernel="linear", random_state=33)

    scaler.fit(X_train)

    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)

    model.fit(X_train, y_train)
    pred = model.predict(X_test)


    if pred[0] != y_test[0]:
        error[test_index[0]] = error.get(test_index[0], {}) | {"delta": (int(pred[0]), int(y_test[0]))}

    y_true.append(y_test[0])
    y_pred.append(pred[0])

y_true = np.array(y_true)
y_pred = np.array(y_pred)

acc = accuracy_score(y_true, y_pred)
cm = confusion_matrix(y_true, y_pred) 
acc, cm