In [None]:
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn import preprocessing
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV
import matplotlib.pyplot as plt
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import average_precision_score
from itertools import cycle
from sklearn.metrics import classification_report
from sklearn.svm import SVC

### Load features and labels

In [None]:
# Be careful with your features, you may need reshape
features1 = np.load("./translated_embeddings/resnet.npy") # video
features2 = np.load("./translated_embeddings/yamnet.npy") # audio
labels = pd.read_csv("annotations.csv")

### Classify count of all kinds of cars

In [None]:
def get_count_label(num):
    if num == 0:
        return "free"
    if num < 3:
        return "few"
    if num < 6:
        return "medium"
    return "busy"

In [None]:
labelencoder = LabelEncoder()
labels["total_encoder"] = labelencoder.fit_transform(labels["total"].apply(get_count_label))
labelencoder.classes_

## concatenate video with different amount of audio

In [None]:
X_train, X_test, y_train, y_test = train_test_split(dataP1, labels["total_encoder"], test_size=0.2, random_state=42)
numOfSamples = [0, 100, 200, 500, 1000, 1500, 2000, 3000, 4000, 5076]

### Random Forest

In [None]:
def evaluate(model, test_features, test_labels, labels):
    print("accuracy:", model.score(test_features, test_labels))
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 8))
    disp = plot_confusion_matrix(model, X_valid, y_valid,
                             display_labels=labelencoder.classes_,
                             cmap=plt.cm.Blues,
                             normalize="true", ax=ax1)

    print(disp.confusion_matrix)
    
    y_score = model.predict_proba(test_features)
    

    # For each class
    precision = dict()
    recall = dict()
    average_precision = dict()
    for i in range(len(labels)):
        precision[i], recall[i], _ = precision_recall_curve(test_labels.apply(lambda x: 1 if x==i else 0),
                                                            y_score[:, i])
        average_precision[i] = average_precision_score(test_labels.apply(lambda x: 1 if x==i else 0),
                                                            y_score[:, i])
    
    colors = cycle(['navy', 'turquoise', 'darkorange', 'cornflowerblue'])

    lines = []
    classes = []
    
    for i, color in zip(range(len(labels)), colors):
        l, = plt.plot(recall[i], precision[i], color=color, lw=2)
        lines.append(l)
        classes.append('Precision-recall for class {0} (area = {1:0.2f})'
                      ''.format(labels[i], average_precision[i]))
        

    ax2.set_xlim([0.0, 1.0])
    ax2.set_ylim([0.0, 1.05])
    ax2.set_xlabel('Recall')
    ax2.set_ylabel('Precision')
    ax2.set_title('Extension of Precision-Recall curve to multi-class')
    ax2.legend(lines, classes, loc=(0, -.38), prop=dict(size=14))
    
    plt.show()

    return model.score(test_features, test_labels)

In [None]:
# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]
# Number of features to consider at every split
max_features = ['auto', 'sqrt']
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(10, 100, num = 40)]
# Minimum number of samples required to split a node
min_samples_split = [2, 5, 10]
# Minimum number of samples required at each leaf node
# min_samples_leaf = [1, 2, 4]
# Create the random grid
random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split}

acc = {}

for num in numOfSamples:
    print("training samples from audio: ", num)
    if num > 0:
        X_combine_train = np.concatenate((X_train[:num, :], dataP2), axis=0)
        y_combine_train = np.concatenate((y_train[:num],filtered_labels["total_encoder"]), axis=0)
    else:
        X_combine_train = dataP2
        y_combine_train = filtered_labels["total_encoder"]

    random_forest_clf = RandomForestClassifier(random_state=42)
    rf_random = RandomizedSearchCV(estimator = random_forest_clf, param_distributions = random_grid, n_iter = 20, cv = 3, verbose=2, random_state=42, n_jobs = -1)
    rf_random.fit(X_combine_train, y_combine_train)
    print(rf_random.best_params_)
    best_random = rf_random.best_estimator_
    random_accuracy = evaluate(best_random, X_test, y_test, labelencoder.classes_)
    acc[num] = random_accuracy
    acc_range[num] = rf_random.cv_results_

print(acc)

### SVM

In [None]:
def evaluate_svm(model, test_features, test_labels, labels):
    print("accuracy:", model.score(test_features, test_labels))
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 8))
    disp = plot_confusion_matrix(model, X_valid, y_valid,
                             display_labels=labelencoder.classes_,
                             cmap=plt.cm.Blues,
                             normalize="true", ax=ax1)

    print(disp.confusion_matrix)
    
    y_score = model.decision_function(test_features)
    

    # For each class
    precision = dict()
    recall = dict()
    average_precision = dict()
    for i in range(len(labels)):
        precision[i], recall[i], _ = precision_recall_curve(test_labels.apply(lambda x: 1 if x==i else 0),
                                                            y_score[:, i])
        average_precision[i] = average_precision_score(test_labels.apply(lambda x: 1 if x==i else 0),
                                                            y_score[:, i])
    
    colors = cycle(['navy', 'turquoise', 'darkorange', 'cornflowerblue'])

    lines = []
    classes = []
    
    for i, color in zip(range(len(labels)), colors):
        l, = plt.plot(recall[i], precision[i], color=color, lw=2)
        lines.append(l)
        classes.append('Precision-recall for class {0} (area = {1:0.2f})'
                      ''.format(labels[i], average_precision[i]))
        

    ax2.set_xlim([0.0, 1.0])
    ax2.set_ylim([0.0, 1.05])
    ax2.set_xlabel('Recall')
    ax2.set_ylabel('Precision')
    ax2.set_title('Extension of Precision-Recall curve to multi-class')
    ax2.legend(lines, classes, loc=(0, -.38), prop=dict(size=14))
    
    plt.show()

    return model.score(test_features, test_labels)

In [None]:
acc = {}

for num in numOfSamples:
    print("training samples from audio: ", num)
    if num > 0:
        X_combine_train = np.concatenate((X_train[:num, :], dataP2), axis=0)
        y_combine_train = np.concatenate((y_train[:num],labels["total_encoder"]), axis=0)
    else:
        X_combine_train = dataP2
        y_combine_train = labels["total_encoder"]

    clf = SVC(kernel="linear", decision_function_shape="ovo", random_state=42)
    clf.fit(X_combine_train, y_combine_train)
    random_accuracy = evaluate_svm(clf, X_test, y_test, labelencoder.classes_)
    acc[num] = random_accuracy

print(acc)

## concatenate audio with different amout of video

In [None]:
X_train, X_test, y_train, y_test = train_test_split(dataP2, filtered_labels["total_encoder"], test_size=0.2, random_state=42)

### Random Forest

In [None]:
acc ={}

for num in numOfSamples:
    print("training samples from video: ", num)
    if num > 0:
        X_combine_train = np.concatenate((X_train[:num, :], dataP2), axis=0)
        y_combine_train = np.concatenate((y_train[:num],labels["total_encoder"]), axis=0)
    else:
        X_combine_train = dataP2
        y_combine_train = labels["total_encoder"]

    random_forest_clf = RandomForestClassifier(random_state=42)
    rf_random = RandomizedSearchCV(estimator = random_forest_clf, param_distributions = random_grid, n_iter = 20, cv = 3, verbose=2, random_state=42, n_jobs = -1)
    rf_random.fit(X_combine_train, y_combine_train)
    print(rf_random.best_params_)
    best_random = rf_random.best_estimator_
    random_accuracy = evaluate(best_random, X_test, y_test, labelencoder.classes_)
    acc[num] = random_accuracy


### SVM

In [None]:
acc = {}

for num in numOfSamples:
    print("training samples from video: ", num)
    if num > 0:
        X_combine_train = np.concatenate((X_train[:num, :], dataP2), axis=0)
        y_combine_train = np.concatenate((y_train[:num],labels["total_encoder"]), axis=0)
    else:
        X_combine_train = dataP2
        y_combine_train = labels["total_encoder"]

    clf = SVC(kernel="linear", decision_function_shape="ovo", random_state=42)
    clf.fit(X_combine_train, y_combine_train)
    random_accuracy = evaluate_svm(clf, X_test, y_test, labelencoder.classes_)
    acc[num] = random_accuracy

print(acc)