In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os

In [None]:
os.chdir("/content/drive/My Drive/Thesis")

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import *
import json

In [None]:
with open('training.json') as f:
  file2 = json.load(f)
  f.close()


In [None]:
def get_surroundings(pos, size, ar):
  first_end_index = -1
  second_end_index = pos

  for i in range(0, pos):
    first_end_index = i
  for i in range(pos + 1, size):
    second_end_index = i
    break

  return first_end_index, pos, second_end_index

def get_dist(x, y):
  return np.abs(x - y)

def get_displacement(x, y):
  return x - y

def get_feature(x1, x2, x3, y1, y2, y3):
  r = []

  r.append(get_dist(x1, x2))
  r.append(get_dist(x3, x2))
  r.append(get_dist(x1, x3))

  r.append(get_dist(y1, y2))
  r.append(get_dist(y3, y2))
  r.append(get_dist(y1, y3))

  r.append(get_displacement(x2, y1))
  r.append(get_displacement(x2, y2))
  r.append(get_displacement(x2, y3))

  r.append(get_displacement(x1, y1))
  r.append(get_displacement(x1, y2))
  r.append(get_displacement(x1, y3))

  r.append(get_displacement(x3, y1))
  r.append(get_displacement(x3, y2))
  r.append(get_displacement(x3, y3))

  return r

def prepare_data(df):
  data_x, data_y = [], []
  for dic in df:
    x_pos = dic['x_pos']
    y_pos = dic['y_pos']
    size = len(x_pos)
    for i in range(size):
      if x_pos[i] != 0:
        x1, x2, x3 = get_surroundings(i, size, x_pos)
        for j in range(size):
          if y_pos[j] != 0:
            y1, y2, y3 = get_surroundings(j, size, y_pos)
            features = get_feature(x1, x2, x3, y1, y2, y3)
            data_x.append(features)
            data_y.append(1 if x_pos[i] == y_pos[j] else 0)


  return data_x, data_y

In [None]:
def get_arr2(df, model):
  ty, py = [], []
  for dic in df:
    x = dic['x_pos']
    y = dic['y_pos']
    for i in range(len(x)):
      if x[i] != 0:
        temp_ty = []
        temp_py = []
        x1, x2, x3 = get_surroundings(i, len(x), x)
        for j in range(len(y)):
          if y[j] != 0:
            y1, y2, y3 = get_surroundings(j, len(y), y)
            ft = get_feature(x1, x2, x3, y1, y2, y3)
            pp = model.predict_proba([ft])[0]
            pp = pp[1]/(pp[0] + pp[1])
            temp_py.append(pp)
            if y[j] == x[i]:
              temp_ty.append(1)
            else:
              temp_ty.append(0)
        #idx = np.argmax(temp_py)
        #temp_py = [1 if _ >= 0.5 else 0 for _ in temp_py]
        #temp_py = [0] * len(temp_py)
        #temp_py[idx] = 1
        ty.extend(temp_ty)
        py.extend(temp_py) 

  return ty, py

In [None]:
train_x, train_y = prepare_data(file[116:])
valid_x, valid_y = prepare_data(file[:116])

In [2]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

In [None]:
model = RandomForestClassifier()
# model = SVC()
model.fit(train_x, train_y)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [None]:
ty, tp = get_arr2(file[116:], model)

In [None]:
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

In [None]:
print(classification_report(ty, tp))
print(confusion_matrix(ty, tp))

              precision    recall  f1-score   support

           0       0.95      0.95      0.95      8998
           1       0.77      0.77      0.77      2008

    accuracy                           0.92     11006
   macro avg       0.86      0.86      0.86     11006
weighted avg       0.92      0.92      0.92     11006

[[8546  452]
 [ 452 1556]]


In [None]:
ty, tp = get_arr2(file[:116], model)
print(classification_report(ty, tp))
print(confusion_matrix(ty, tp))

              precision    recall  f1-score   support

           0       0.95      0.95      0.95      2515
           1       0.75      0.75      0.75       524

    accuracy                           0.91      3039
   macro avg       0.85      0.85      0.85      3039
weighted avg       0.91      0.91      0.91      3039

[[2384  131]
 [ 131  393]]
