# Algo to find delay between video with heatmap

In [1]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [4]:
ds = np.load(r"..\datasets\isia_flow_similarity_with_delay.npy", allow_pickle=True)
ds.shape

(12244, 4)

In [5]:
ds_test = np.load(r"..\datasets\click_flow_similarity_with_delay.npy", allow_pickle=True)
ds_test.shape

(500, 4)

## Delay 0 to 10 frames

In [9]:
ds = ds.tolist()
data = []
for i in range(len(ds)):
    if ds[i][1]!=-1 and ds[i][1]<10:
        data.append(ds[i])

data = np.array(data, dtype=object)

X_train = data[:,0].tolist()
y_train = data[:,1].tolist()

In [6]:
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42, stratify=y)

In [10]:
clf = RandomForestClassifier(n_estimators = 600, random_state=0)
clf.fit(X_train, y_train)

In [11]:
ds_test = ds_test.tolist()
data = []
for i in range(len(ds_test)):
    if ds_test[i][1]!=-1 and ds_test[i][1]<10:
        data.append(ds_test[i])

data = np.array(data, dtype=object)

X_test = data[:,0].tolist()
y_test = data[:,1].tolist()

In [12]:
y_pred_test = clf.predict(X_test)
print(classification_report(y_test, y_pred_test))

              precision    recall  f1-score   support

           0       0.97      0.97      0.97        29
           1       0.77      1.00      0.87        30
           2       1.00      0.70      0.82        30
           3       0.91      0.95      0.93        22
           4       0.90      0.82      0.86        22
           5       1.00      0.85      0.92        26
           6       0.90      0.75      0.82        24
           7       0.78      0.74      0.76        19
           8       0.65      0.89      0.76        19
           9       0.77      0.89      0.83        19

    accuracy                           0.86       240
   macro avg       0.87      0.86      0.85       240
weighted avg       0.88      0.86      0.86       240



## Delay between 0 and 20 frames

-1 means delay is greater than the lenght of the sequnece

In [16]:
ds = np.load(r"..\datasets\isia_flow_similarity_with_delay.npy", allow_pickle=True)

In [20]:
data = ds.tolist()

for i in range(len(data)):
    if data[i][1] == 20: data[i][1] = -1

data = np.array(data, dtype=object)

X_train = data[:,0].tolist()
y_train = data[:,1].tolist()

In [15]:
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42, stratify=y)

In [21]:
clf = RandomForestClassifier(n_estimators = 600, random_state=0)
clf.fit(X_train, y_train)

In [24]:
data = ds_test

for i in range(len(data)):
    if data[i][1] == 20: data[i][1] = -1

data = np.array(data, dtype=object)

X_test = data[:,0].tolist()
y_test = data[:,1].tolist()

In [25]:
y_pred_test = clf.predict(X_test)
print(classification_report(y_test, y_pred_test))

              precision    recall  f1-score   support

          -1       0.20      0.46      0.28        24
           0       0.85      0.97      0.90        29
           1       0.65      1.00      0.79        30
           2       0.82      0.77      0.79        30
           3       0.63      0.86      0.73        22
           4       0.74      0.77      0.76        22
           5       0.69      0.69      0.69        26
           6       0.94      0.67      0.78        24
           7       0.55      0.84      0.67        19
           8       0.50      0.74      0.60        19
           9       0.80      0.63      0.71        19
          10       0.65      0.74      0.69        23
          11       0.80      0.22      0.35        18
          12       0.46      0.23      0.31        26
          13       0.56      0.43      0.49        21
          14       0.52      0.52      0.52        21
          15       0.38      0.23      0.29        22
          16       0.69    

## Find best hyperparameters (No improvement)

In [11]:
ds = np.load(r"datasets\flow_similarity_with_delay.npy", allow_pickle=True)

data = ds.tolist()

for i in range(len(data)):
    if data[i][1] == 20: data[i][1] = -1

data = np.array(data, dtype=object)

X = data[:,0].tolist()
y = data[:,1].tolist()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42, stratify=y)

In [37]:
# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 50, stop = 800, num = 5)]
# Number of features to consider at every split
max_features = ['auto', 'sqrt']
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
max_depth.append(None)
# Minimum number of samples required to split a node
min_samples_split = [2, 5, 10]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 4]

# Create the random grid
random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf}

print(random_grid)

{'n_estimators': [50, 237, 425, 612, 800], 'max_features': ['auto', 'sqrt'], 'max_depth': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, None], 'min_samples_split': [2, 5, 10], 'min_samples_leaf': [1, 2, 4]}


In [38]:
# Use the random grid to search for best hyperparameters
# First create the base model to tune
rf = RandomForestClassifier()
# Random search of parameters, using 3 fold cross validation, 
# search across 100 different combinations, and use all available cores
rf_random = RandomizedSearchCV(estimator = rf, param_distributions = random_grid, n_iter = 100, cv = 3, verbose=2, random_state=42, n_jobs = -1)
# Fit the random search model
rf_random.fit(X_train, y_train)

Fitting 3 folds for each of 100 candidates, totalling 300 fits


  warn(


In [39]:
rf_random.best_params_

{'n_estimators': 800,
 'min_samples_split': 2,
 'min_samples_leaf': 2,
 'max_features': 'auto',
 'max_depth': 30}

In [12]:
clf = RandomForestClassifier(n_estimators = 800, min_samples_split=2, min_samples_leaf=2, max_depth=30)
clf.fit(X_train, y_train)

In [13]:
clf.score(X_test, y_test)

0.27636363636363637

In [14]:
y_pred_test = clf.predict(X_test)
print(classification_report(y_test, y_pred_test))

              precision    recall  f1-score   support

          -1       0.11      0.64      0.19        22
           0       0.47      0.58      0.52        12
           1       0.48      0.71      0.57        14
           2       0.60      0.46      0.52        13
           3       0.56      0.42      0.48        12
           4       0.33      0.36      0.34        14
           5       0.41      0.50      0.45        14
           6       0.62      0.57      0.59        14
           7       0.40      0.15      0.22        13
           8       0.40      0.18      0.25        11
           9       0.50      0.15      0.24        13
          10       0.25      0.15      0.19        13
          11       0.60      0.25      0.35        12
          12       0.00      0.00      0.00        12
          13       0.22      0.15      0.18        13
          14       0.33      0.08      0.12        13
          15       0.00      0.00      0.00        13
          16       0.00    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
