In [1]:
import pandas as pd
import numpy as np

In [2]:
import xgboost as xgb
model_xgb = xgb.XGBRegressor()
model_xgb.load_model('XGB_normalized_topn_model.json')

## 去除雜訊

In [3]:
def RemoveNoise(dataset, remove_condition):
    # Remove data with gaze vector X, Y, Z have value 0.
    print(f'Total count: {len(dataset)}, gaze vector zero count: {len(dataset[remove_condition])}')

    dataset = dataset.drop(dataset[remove_condition].index)

    print(f'After dropping noises, total count: {len(dataset)}')
    
    return dataset

In [4]:
def FaceLandmarksPreprocessing(df_data, verbose = False):
    for col_name in df_data.columns.to_list():
        if col_name.find('FaceLandmarks_') == 0 or \
        col_name.find('EyeLandmarks_') == 0 or \
        col_name.find('LeftEyeBoundingBox_') == 0 or \
        col_name.find('RightEyeBoundingBox_') == 0 or \
        col_name.find('LeftEyeMidPoint_') == 0 or \
        col_name.find('RightEyeMidPoint_') == 0:
            if col_name.find('_X') == len(col_name) - 2:
                df_data[col_name] = df_data[col_name] - df_data['FaceBoundingBox_X']
                if verbose:
                    print(f'Column {col_name} is subtracted by FaceBoundingBox_X.')
            elif col_name.find('_Y') == len(col_name) - 2:
                df_data[col_name] = df_data[col_name] - df_data['FaceBoundingBox_Y']
                if verbose:
                    print(f'Column {col_name} is subtracted by FaceBoundingBox_Y.')
            else:
#                 print(f'Ignore column {col_name}.')
                pass
        else:
#             print(f'Ignore column {col_name}')
            pass
    return df_data

# 列出重要的特徵。
## 正相關特徵
# col_positive = []

## 負相關特徵
# col_negative = []


In [5]:
# 列出重要的特徵。
col_high_rel = ['HeadPoseAngles_Y', 'GazeVector_X', 'GazeVector_Z', 'GazeVector_Y', 'HeadPoseAngles_Z']
col_deleted = ['FaceBoundingBox_X', 'FaceBoundingBox_Y']

In [6]:
## 所有重要特徵－正相關特徵 + 負相關特徵
col_reserved = []
# for col_name in col_positive:
#     col_reserved.append(col_name)
# for col_name in col_negative:
#     col_reserved.append(col_name)
for col_name in col_high_rel:
    col_reserved.append(col_name)

print(col_reserved)

['HeadPoseAngles_Y', 'GazeVector_X', 'GazeVector_Z', 'GazeVector_Y', 'HeadPoseAngles_Z']


In [7]:
# 刪除沒有被列在重要特徵列表的特徵。
def ReserveImportColumns(df_data, col_import):
    for col_name in df_data.columns.to_list():
        if col_name not in col_import:
            df_data = df_data.drop(col_name, axis = 1)
    return df_data

In [8]:
# 算準確率。
def GetScene2AccuractFromTest(arrTestAns, correctAns):
    accurate_rate = np.count_nonzero(arrTestAns == correctAns) / len(arrTestAns)
#     print(accurate_rate)
    return accurate_rate

In [9]:
# Test
# 測試一個答案全部為1的資料。
df_test = pd.read_csv('/project/xt121-group5/scene2_data/test_data/2023-05-06 135830_c.csv')
df_test = RemoveNoise(df_test, 
                      (df_test['GazeVector_X'] == 0) & 
                      (df_test['GazeVector_Y'] == 0) & 
                      (df_test['GazeVector_Z'] == 0))
df_test = FaceLandmarksPreprocessing(df_test)
df_test = df_test.drop(col_deleted, axis = 1)
df_test = ReserveImportColumns(df_test, col_reserved)

# npa_test = df_test.to_numpy()

Total count: 280, gaze vector zero count: 0
After dropping noises, total count: 280


In [10]:
pred = model_xgb.predict(df_test)
print(pred)

[0.98261386 0.9843366  0.98477805 0.98477805 0.9798188  0.9798188
 0.9848636  0.98477805 0.9848636  0.9798188  0.97837836 0.9848636
 0.9848636  0.9843366  0.9798188  0.98136914 0.98261386 0.9818242
 0.9843366  0.98261386 0.98038495 0.98038495 0.9798188  0.98038495
 0.9798188  0.9819357  0.9843366  0.9843366  0.98129874 0.9798188
 0.98136914 0.98368555 0.98129874 0.98129874 0.98261386 0.98261386
 0.98038495 0.98038495 0.9848636  0.9848636  0.98129874 0.98129874
 0.9754126  0.92446107 0.98477805 0.98038495 0.97720826 0.9760994
 0.9798188  0.98261386 0.98261386 0.98261386 0.98261386 0.98261386
 0.98261386 0.98261386 0.98261386 0.98261386 0.98399127 0.9793759
 0.9865146  0.98261386 0.98261386 0.98261386 0.9865146  0.9865146
 0.98261386 0.98261386 0.98038495 0.9767402  0.98261386 0.9798188
 0.98261386 0.98038495 0.9798188  0.9798188  0.9803836  0.9803836
 0.9865146  0.98136914 0.9784424  0.985545   0.9852904  0.9865146
 0.9865146  0.9865146  0.9865146  0.9865146  0.98261386 0.9865146
 0.986

In [11]:
# Test
# 測試一個答案全部為0的資料。
df_test = pd.read_csv('/project/xt121-group5/scene2_data/test_data/2023-05-06 135847_nc.csv')
df_test = RemoveNoise(df_test, 
                      (df_test['GazeVector_X'] == 0) & 
                      (df_test['GazeVector_Y'] == 0) & 
                      (df_test['GazeVector_Z'] == 0))
df_test = FaceLandmarksPreprocessing(df_test)
df_test = df_test.drop(col_deleted, axis = 1)
df_test = ReserveImportColumns(df_test, col_reserved)

# npa_test = df_test.to_numpy()
pred = model_xgb.predict(df_test)
print(pred)

Total count: 327, gaze vector zero count: 49
After dropping noises, total count: 278
[6.76751370e-03 6.76751370e-03 3.53610632e-03 6.76751370e-03
 7.53736123e-03 1.07478909e-02 9.05721541e-03 9.83525068e-03
 1.40107460e-02 1.07478909e-02 1.07478909e-02 5.98110957e-03
 1.07478909e-02 9.32691037e-04 1.43287145e-03 3.47514939e-03
 6.68180641e-04 7.92354112e-04 5.11338981e-03 7.92354112e-04
 6.17629034e-04 6.68180641e-04 1.04356958e-02 3.52250761e-03
 3.28420498e-03 3.53610632e-03 1.80665264e-03 3.49450856e-03
 5.34198480e-04 7.44341826e-03 2.58118031e-03 2.58118031e-03
 5.49226906e-03 2.19168072e-03 1.80665264e-03 2.29188311e-03
 2.58118031e-03 2.56361882e-03 3.60276410e-03 3.67499981e-03
 1.03693800e-02 2.58118031e-03 2.30372953e-03 1.39833521e-03
 2.58118031e-03 2.28315615e-03 2.58118031e-03 5.12727164e-03
 3.60276410e-03 2.29188311e-03 3.19933542e-03 3.19933542e-03
 3.19933542e-03 3.19933542e-03 3.19933542e-03 4.55391267e-03
 4.55391267e-03 3.72182950e-02 3.94545794e-02 3.32730450e-02


In [12]:
# Test larry
# 測試一個答案全部為1的資料。
df_test = pd.read_csv('/project/xt121-group5/scene2_data/test_data/2023-05-13 141120_c.csv')
df_test = RemoveNoise(df_test, 
                      (df_test['GazeVector_X'] == 0) & 
                      (df_test['GazeVector_Y'] == 0) & 
                      (df_test['GazeVector_Z'] == 0))
df_test = FaceLandmarksPreprocessing(df_test)
df_test = df_test.drop(col_deleted, axis = 1)
df_test = ReserveImportColumns(df_test, col_reserved)

# npa_test = df_test.to_numpy()
pred = model_xgb.predict(df_test)
print(pred)

Total count: 350, gaze vector zero count: 8
After dropping noises, total count: 342
[0.83967865 0.92720824 0.92115486 0.92606664 0.92115486 0.9036646
 0.93976074 0.9121681  0.91159546 0.9277609  0.7829853  0.8079549
 0.8079549  0.8079549  0.7759585  0.8079549  0.92138976 0.82189983
 0.82189983 0.8028388  0.82189983 0.84328806 0.84328806 0.78268045
 0.84328806 0.84328806 0.84328806 0.84328806 0.84328806 0.80767655
 0.80767655 0.80767655 0.84328806 0.80767655 0.84328806 0.889008
 0.84328806 0.78268045 0.78268045 0.84328806 0.84328806 0.84328806
 0.82189983 0.84328806 0.80767655 0.78268045 0.82189983 0.78268045
 0.82189983 0.80767655 0.84328806 0.84328806 0.82189983 0.84328806
 0.82189983 0.82189983 0.69179416 0.06823155 0.2244644  0.7590752
 0.7590752  0.84328806 0.8071711  0.80767655 0.82189983 0.80767655
 0.81753254 0.78268045 0.82189983 0.7420415  0.701294   0.7240534
 0.7535571  0.7073402  0.7395014  0.44360188 0.74755985 0.8012841
 0.78787816 0.73556095 0.81984866 0.69040054 0.72405

In [13]:
pd.DataFrame(pred).describe()

Unnamed: 0,0
count,342.0
mean,0.794066
std,0.113881
min,0.068232
25%,0.749434
50%,0.801284
75%,0.843288
max,0.958425


In [14]:
# Test larry
# 測試一個答案全部為0的資料。
df_test = pd.read_csv('/project/xt121-group5/scene2_data/test_data/2023-05-13 141134_nc.csv')
df_test = RemoveNoise(df_test, 
                      (df_test['GazeVector_X'] == 0) & 
                      (df_test['GazeVector_Y'] == 0) & 
                      (df_test['GazeVector_Z'] == 0))
df_test = FaceLandmarksPreprocessing(df_test)
df_test = df_test.drop(col_deleted, axis = 1)
df_test = ReserveImportColumns(df_test, col_reserved)

# npa_test = df_test.to_numpy()
pred = model_xgb.predict(df_test)
print(pred)

Total count: 399, gaze vector zero count: 217
After dropping noises, total count: 182
[3.2177396e-02 3.6767069e-02 9.9294782e-02 6.3171700e-02 3.6236405e-02
 6.4783528e-02 9.7982645e-02 2.6452927e-02 2.6740537e-03 3.3791165e-03
 1.5997117e-03 3.3791165e-03 2.8225940e-03 3.3791165e-03 2.6740537e-03
 3.3791165e-03 1.5997117e-03 1.3623376e-03 1.8146450e-03 2.6740537e-03
 1.5454260e-03 9.0552028e-04 1.3623376e-03 1.3623376e-03 7.9115981e-04
 9.0552028e-04 9.0552028e-04 9.0552028e-04 7.9115981e-04 9.0552028e-04
 7.9115981e-04 1.3623376e-03 9.0552028e-04 9.0552028e-04 1.3601804e-03
 8.1980735e-01 7.3929542e-01 5.5058432e-01 5.9328336e-01 7.0428491e-01
 5.9328336e-01 5.1202160e-01 5.0265789e-01 3.0016744e-01 4.6009937e-01
 5.0265789e-01 1.7779611e-01 1.7779611e-01 3.0016744e-01 6.3608670e-01
 3.0016744e-01 2.1129943e-01 2.6559755e-01 1.7779611e-01 2.1129943e-01
 1.7779611e-01 1.7779611e-01 1.7779611e-01 3.3755279e-01 3.1931087e-01
 3.3755279e-01 3.2001102e-01 3.3755279e-01 3.1931087e-01 3.193

In [15]:
pd.DataFrame(pred).describe()

Unnamed: 0,0
count,182.0
mean,0.270295
std,0.244111
min,0.000371
25%,0.015479
50%,0.263291
75%,0.400854
max,0.970752


In [16]:
# Test PX
# 測試一個答案全部為1的資料。
df_test = pd.read_csv('/project/xt121-group5/scene2_data_01equal/pinsian/2023-05-13 142553_c.csv')
df_test = RemoveNoise(df_test, 
                      (df_test['GazeVector_X'] == 0) & 
                      (df_test['GazeVector_Y'] == 0) & 
                      (df_test['GazeVector_Z'] == 0))
df_test = FaceLandmarksPreprocessing(df_test)
df_test = df_test.drop(col_deleted, axis = 1)
df_test = ReserveImportColumns(df_test, col_reserved)

# npa_test = df_test.to_numpy()
pred = model_xgb.predict(df_test)
print(pred)
print(pd.DataFrame(pred).describe())

Total count: 280, gaze vector zero count: 0
After dropping noises, total count: 280
[0.9378378  0.9378378  0.9406357  0.94331557 0.9511811  0.95553887
 0.95553887 0.95553887 0.96888417 0.96888417 0.9699654  0.9699654
 0.95553887 0.9633924  0.96637684 0.9633924  0.9691914  0.9633924
 0.9633924  0.9633924  0.9666426  0.9633924  0.9666426  0.9666426
 0.9747547  0.9666426  0.9633924  0.9633924  0.9666426  0.9666426
 0.9666426  0.9666426  0.9666426  0.9511811  0.9664115  0.96585387
 0.9699654  0.9699654  0.9699654  0.961055   0.9699654  0.9664115
 0.9664115  0.95721376 0.9465077  0.9792762  0.9747547  0.96764797
 0.97772    0.97683513 0.97772    0.96937066 0.97152764 0.97683513
 0.97772    0.9661658  0.97772    0.9666426  0.9661658  0.96937066
 0.9792762  0.96945333 0.9792762  0.9792762  0.97278637 0.97722787
 0.97278637 0.9747547  0.9792762  0.9722721  0.96764797 0.9637943
 0.96253854 0.9637943  0.96253854 0.9637719  0.9637719  0.96626973
 0.9705082  0.9705082  0.9637719  0.9705082  0.9524

In [17]:
# Test PX
# 測試一個答案全部為0的資料。
df_test = pd.read_csv('/project/xt121-group5/scene2_data_01equal/pinsian/2023-05-13 142611 nc.csv')
df_test = RemoveNoise(df_test, 
                      (df_test['GazeVector_X'] == 0) & 
                      (df_test['GazeVector_Y'] == 0) & 
                      (df_test['GazeVector_Z'] == 0))
df_test = FaceLandmarksPreprocessing(df_test)
df_test = df_test.drop(col_deleted, axis = 1)
df_test = ReserveImportColumns(df_test, col_reserved)

# npa_test = df_test.to_numpy()
pred = model_xgb.predict(df_test)
print(pred)
print(pd.DataFrame(pred).describe())

Total count: 280, gaze vector zero count: 28
After dropping noises, total count: 252
[7.60722160e-01 7.60722160e-01 7.60722160e-01 7.60722160e-01
 7.60722160e-01 7.60722160e-01 7.60722160e-01 7.60722160e-01
 7.60722160e-01 7.60722160e-01 3.65914464e-01 1.38541937e-01
 8.69615152e-02 8.69615152e-02 8.69615152e-02 8.69615152e-02
 8.69615152e-02 8.69615152e-02 8.69615152e-02 8.69615152e-02
 8.69615152e-02 8.69615152e-02 1.38541937e-01 1.38541937e-01
 1.38541937e-01 1.78456709e-01 2.19151005e-01 2.19151005e-01
 1.59307256e-01 1.38541937e-01 3.98132578e-02 2.84824669e-02
 1.56635456e-02 6.88452041e-03 4.78819339e-03 3.84211482e-04
 3.84211482e-04 3.84211482e-04 3.84211482e-04 3.84211482e-04
 3.84211482e-04 3.84211482e-04 3.84211482e-04 3.84211482e-04
 3.84211482e-04 3.84211482e-04 3.84211482e-04 3.84211482e-04
 3.84211482e-04 4.34629852e-04 3.84211482e-04 3.84211482e-04
 3.84211482e-04 3.84211482e-04 3.84211482e-04 3.84211482e-04
 4.34629852e-04 4.56149573e-04 3.84211482e-04 4.34629852e-04


## 自行計算準確率

### 取資料

In [18]:
# 測試一個答案全部為1的資料。
df_test_1 = pd.read_csv('/project/xt121-group5/scene2_data/test_data/2023-05-17 194144 c.csv')
df_test_1 = RemoveNoise(df_test_1, 
                      (df_test_1['GazeVector_X'] == 0) & 
                      (df_test_1['GazeVector_Y'] == 0) & 
                      (df_test_1['GazeVector_Z'] == 0))
df_test_1 = FaceLandmarksPreprocessing(df_test_1)
df_test_1 = df_test_1.drop(col_deleted, axis = 1)
df_test_1 = ReserveImportColumns(df_test_1, col_reserved)

Total count: 1082, gaze vector zero count: 0
After dropping noises, total count: 1082


In [19]:
# 測試一個答案全部為0的資料。
df_test_0 = pd.read_csv('/project/xt121-group5/scene2_data/test_data/2023-05-17 194223 nc.csv')
df_test_0 = RemoveNoise(df_test_0, 
                      (df_test_0['GazeVector_X'] == 0) & 
                      (df_test_0['GazeVector_Y'] == 0) & 
                      (df_test_0['GazeVector_Z'] == 0))
df_test_0 = FaceLandmarksPreprocessing(df_test_0)
df_test_0 = df_test_0.drop(col_deleted, axis = 1)
df_test_0 = ReserveImportColumns(df_test_0, col_reserved)

Total count: 956, gaze vector zero count: 70
After dropping noises, total count: 886


### Random Forest

In [20]:
import pickle

In [21]:
# load model
with open('RF_pickle_model.pkl', 'rb') as f:
    rf_model = pickle.load(f)

In [22]:
pred = rf_model.predict(df_test_1)
print(pred)
print(f'Test accurate rate: {GetScene2AccuractFromTest(pred, 1) * 100}%')

[1 1 1 ... 1 1 1]
Test accurate rate: 88.72458410351202%


In [23]:
pred = rf_model.predict(df_test_0)
print(pred)
print(f'Test accurate rate: {GetScene2AccuractFromTest(pred, 0) * 100}%')

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 

### XGBoost

In [24]:
threshold = 0.5

In [25]:
pred = model_xgb.predict(df_test_1)
print(pred)

pred[pred >= threshold] = 1
pred[pred < threshold] = 0
print(f'Test accurate rate: {GetScene2AccuractFromTest(pred, 1) * 100}%')

[0.8074277 0.8074277 0.8074277 ... 0.8184088 0.8184088 0.8184088]
Test accurate rate: 98.79852125693161%


In [26]:
pred = model_xgb.predict(df_test_0)
print(pred)

pred[pred >= threshold] = 1
pred[pred < threshold] = 0
print(f'Test accurate rate: {GetScene2AccuractFromTest(pred, 0) * 100}%')

[2.19698474e-02 1.08068911e-02 1.08068911e-02 1.08068911e-02
 1.08068911e-02 8.38471763e-03 8.14673770e-03 1.05008911e-02
 1.08068911e-02 8.14673770e-03 2.13545877e-02 2.13545877e-02
 1.05008911e-02 1.05008911e-02 2.19698474e-02 2.13545877e-02
 2.94292253e-02 3.02699320e-02 3.02699320e-02 3.02699320e-02
 2.19698474e-02 8.38471763e-03 1.08068911e-02 1.08068911e-02
 1.08068911e-02 2.19698474e-02 1.08068911e-02 2.19698474e-02
 1.08068911e-02 2.19698474e-02 1.08068911e-02 2.19698474e-02
 3.02699320e-02 8.14673770e-03 1.08068911e-02 3.04438341e-02
 3.04438341e-02 2.19698474e-02 1.08068911e-02 2.19698474e-02
 1.08068911e-02 1.08068911e-02 9.42058209e-03 1.05008911e-02
 8.38471763e-03 1.08068911e-02 3.02699320e-02 1.54350130e-02
 8.91753845e-03 4.69937542e-04 8.49492848e-04 8.49492848e-04
 8.49492848e-04 4.56149573e-04 8.24577990e-04 8.24577990e-04
 1.05746230e-03 4.34629852e-04 4.34629852e-04 1.05746230e-03
 8.24577990e-04 4.34629852e-04 4.34629852e-04 4.56149573e-04
 4.34629852e-04 4.561495

### Decision Tree

In [27]:
# load model
with open('DT_pickle_model.pkl', 'rb') as f:
    dt_model = pickle.load(f)

In [28]:
pred = dt_model.predict(df_test_1)
print(pred)
print(f'Test accurate rate: {GetScene2AccuractFromTest(pred, 1) * 100}%')

[1 1 1 ... 1 1 1]
Test accurate rate: 87.43068391866913%


In [29]:
pred = dt_model.predict(df_test_0)
print(pred)
print(f'Test accurate rate: {GetScene2AccuractFromTest(pred, 0) * 100}%')

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 1 0 0 1 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 

### KNN

In [30]:
# load model
with open('KNN_pickle_model.pkl', 'rb') as f:
    knn_model = pickle.load(f)

In [31]:
pred = knn_model.predict(df_test_1)
print(pred)
print(f'Test accurate rate: {GetScene2AccuractFromTest(pred, 1) * 100}%')

[1 1 1 ... 1 1 1]
Test accurate rate: 92.05175600739372%


In [32]:
pred = knn_model.predict(df_test_0)
print(pred)
print(f'Test accurate rate: {GetScene2AccuractFromTest(pred, 0) * 100}%')

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 1 1 1 1 0 1 1 1 0 1 1 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 1 1 1 1 0 1 0 1
 0 1 1 0 1 1 1 1 1 0 0 0 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 0 1 0 0 0 0
 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 

### Gradient Boosting

In [33]:
# load model
with open('GB_pickle_model.pkl', 'rb') as f:
    gb_model = pickle.load(f)

In [34]:
pred = gb_model.predict(df_test_1)
print(pred)
print(f'Test accurate rate: {GetScene2AccuractFromTest(pred, 1) * 100}%')

[1 1 1 ... 1 1 1]
Test accurate rate: 98.24399260628466%


In [35]:
pred = gb_model.predict(df_test_0)
print(pred)
print(f'Test accurate rate: {GetScene2AccuractFromTest(pred, 0) * 100}%')

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 1 1 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 