In [1]:
import numpy as np
import matplotlib.pyplot as plt
import cv2
import xgboost as xgb

import sys
sys.path.insert(0,"../Classes")
from ScorerClass import scorer

In [2]:
def depth_scale(X):
    bsum = np.sum(X,axis=1)
    bsum[bsum==0] = 1
    out = X/np.moveaxis([bsum]*X.shape[1],(0,1),(1,0))
    return out

In [3]:
def std_scaler(X,is_d=False):
    if is_d:
        bsum = np.sum(X,axis=1)
        bsum[bsum==0] = 1
        out = X/np.moveaxis([bsum]*X.shape[1],(0,1),(1,0))
        return out
    mu = np.mean(X,axis=0)
    sig = np.sqrt(np.var(X,axis=0))
    if any(sig == 0):
        return X
    out = (X-mu)/sig
    return out

In [4]:
from sklearn.linear_model import LogisticRegression
def select_features(Xin,is_d,y,tr,t,p = 0.1):
    X = std_scaler(Xin,is_d)
    clf = LogisticRegression(max_iter = 1000)
    clf.fit(X[tr],y[tr])
    
    #Sometimes problems?
    sdv = np.std(X,axis=0)
    sdv[sdv==0] = 1
    
    importances = clf.coef_/sdv
    truth_val = importances>=np.percentile(importances,100-100*p)
    truth_val = truth_val[0]
    return truth_val

def load_and_select(path,is_d,y,tr,t,p=0.1):
    X = np.load(path)
    fs = select_features(X,is_d,y,tr,t,p)
    X2 = X[:,fs]
    if is_d:
        X2 = depth_scale(X2)
    return X2

In [5]:
def get_data(autoenc=False,rgb=True,lab=True,depth=True):
    Xlist = []
    if autoenc:
        Xlist.append(load_and_select("../../../Data/Sampled_Features/fs01.npy",False,y,tr,t))
    if rgb:
        Xlist.append(load_and_select("../../../Data/Sampled_Features/fs03.npy",False,y,tr,t))
        Xlist.append(load_and_select("../../../Data/Sampled_Features/fs04.npy",False,y,tr,t))
        Xlist.append(load_and_select("../../../Data/Sampled_Features/fs05.npy",False,y,tr,t))
    if lab:
        Xlist.append(load_and_select("../../../Data/Sampled_Features/fs08.npy",False,y,tr,t))
        Xlist.append(load_and_select("../../../Data/Sampled_Features/fs09.npy",False,y,tr,t))
        Xlist.append(load_and_select("../../../Data/Sampled_Features/fs10.npy",False,y,tr,t))
    if depth:
        Xlist.append(load_and_select("../../../Data/Sampled_Features/fs00.npy",True,y,tr,t))#d
        Xlist.append(load_and_select("../../../Data/Sampled_Features/fs02.npy",True,y,tr,t))#d
        Xlist.append(load_and_select("../../../Data/Sampled_Features/fs06.npy",True,y,tr,t))#d
        Xlist.append(load_and_select("../../../Data/Sampled_Features/fs07.npy",True,y,tr,t))#d
    
    data = np.hstack(Xlist)
    return data

In [6]:
from sklearn.model_selection import train_test_split
seed = 2666

y    = np.load("../../../Data/Sampled_Features/y.npy")

tr,t = train_test_split([a for a in range(y.shape[0])],random_state=seed)

In [7]:
def do_score(pred,y):
    scorer_ = scorer()
    print("Acc:",scorer_.acc(y,pred))
    print("Sens:",scorer_.sens(y,pred))
    print("Spec:",scorer_.spec(y,pred))
    print("VOI:",scorer_.VOI(y,pred))
    print("GCE",scorer_.GCE(y,pred))

In [8]:
from joblib import dump
from sklearn.ensemble import RandomForestClassifier
def do_rf(X,y,tr,t,name):
    save_name = "all_models/rf_"+name+".joblib"
    
    clf = RandomForestClassifier(n_jobs=12)
    clf.fit(X[tr],y[tr])
    
    dump(clf,save_name)
    
    pred = clf.predict(X[t])
    print(name)
    do_score(pred,y[t])

In [9]:
X_rgb = get_data(rgb=True,lab=False,depth=False)
X_lab = get_data(rgb=False,lab=True,depth=False)
#X_dep = get_data(rgb=False,lab=False,depth=True)[0]
#X_ae  = load_and_select("../../../Data/Sampled_Features/fs01.npy",False,y,tr,t)
#Xf = np.hstack((X_rgb,X_lab,X_dep))
#Xfae = np.hstack((Xf,X_ae))
X2 = np.hstack((X_rgb,X_lab))

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [10]:
do_rf(X2,y,tr,t,"COL")
#do_rf(X_rgb,y,tr,t,"RGB")
#do_rf(X_lab,y,tr,t,"LAB")
#do_rf(X_dep,y,tr,t,"DEP")
#do_rf(X_ae,y,tr,t,"AE")
#do_rf(Xf,y,tr,t,"Full")
#do_rf(Xfae,y,tr,t,"FullAE")

COL
Acc: 0.8615898734177215
Sens: 0.8710148958326596
Spec: 0.8513022175469314
VOI: 0.8036812335174714
GCE 1.0022383644332087
