# Feature Extraction and Anomaly Detection Baseline
## Import libraries

In [1]:
import os
import pydub
import numpy as np
from sound.sound_eval import eval_sound
from matplotlib import pyplot as plt
from time_series.anomaly_detection import polyreg_outliar_mse, lof
from time_series.smoothening_functions import  kaiser_wind
from sklearn.metrics import precision_score, recall_score, average_precision_score
from sklearn.preprocessing import StandardScaler

## Setup
### Helper Functionality

In [None]:
def eval_linreg(arr, n, linreg):
    """
    Evaluate linear regression predictions

    Args:
        arr: Data to operate on. This include both the data that will be processed by the regressor and the ground
        truth that should be predicted
        n: Size of autoregressor window such that every n samples form the data from which sample n+1 will be predicted.
        linreg: Trained linear regressor

    Returns:
        Squares of the differences between predictions and ground truth
    """
    arr     = np.array(arr).reshape((len(arr),))
    windows = []
    labels  = []
    for i in range(len(arr) - n):
        windows.append(arr[i : i + n])
        labels.append(arr[i + n])
    return (linreg.predict(windows) - labels)**2


def get_anomls(arr, signal, thr):
    segms   = []
    i       = 0
    while i < len(arr):
        if arr[i] == 0:
            i += 1
            continue
        start = i
        while i < len(arr) and arr[i]>thr:
            i += 1
        if len(signal[i - 180 : i + 180]) > 0:
            segms.append([start, np.max(np.array(signal[i - 180 : i + 180])), i])
        i += 1
    return segms


def get_ap(tot_length, res, labels, start = 0, ends = None):
    if not ends:
        ends = len(res)
    ground_truth = np.zeros((tot_length*6,))
    for l in labels:
        strt, end   = l[0], l[1]
        strt        = (strt % 100) + 60 * int(strt / 100)
        end         = (end % 100) + 60 * int(end / 100)
        for i in range(max(0, strt*6-90),min(len(ground_truth),end*6+90)):
            ground_truth[i] = 1
    return  average_precision_score(ground_truth[start:ends], res),\
            precision_score(ground_truth[start:ends], res > 0.5),\
            recall_score(ground_truth[start:ends], res > 0.5)

def get_ap_at_k(tot_length, segms, labels):
    """
    """
    segms           = reversed(segms)
    ground_truth    = np.zeros((tot_length*6,))
    starts          = np.zeros((tot_length*6,))
    ends            = np.zeros((tot_length*6,))
    for l in labels:
        strt, end   = l[0], l[1]
        strt        = (strt % 100) + 60 * int(strt / 100)
        end         = (end % 100) + 60 * int(end / 100)
        for i in range(max(0, strt * 6 - 90), min(len(ground_truth), end * 6 +90)):
            ground_truth[i] = 1
        starts[strt*6 - 10 : strt*6 + 10]   = 1
        ends[end*6 - 10 : end*6 + 10]       = 1
    count_starts    = 0
    sums_starts     = 0
    count_end       = 0
    sums_end        = 0
    count_coverage  = 0
    sums_coverage   = 0
    for i, s in enumerate(segms):
        if i > 6:
            break
        # Get starts
        if np.sum(starts[s[0]-90:s[0]+60])>0:
            print("start ", i + 1)
            count_starts    += 1
            sums_starts     += count_starts/(i+1)
        
        # Get ends
        if np.sum(ends[s[-1]-60:min(s[-1]+90, len(ends))])>0:
            print("end ", i + 1)
            count_end   += 1
            sums_end    += count_end/(i+1)
        
        # Get coverage
        if np.mean(ground_truth[s[0]:s[-1]]) > 0.5:
            print("coverage ", i + 1)
            count_coverage  += 1
            sums_coverage   += count_coverage/(i+1)
    
    return sums_starts/max(1,count_starts), sums_end/max(1,count_end), sums_coverage/max(1,count_coverage)

### Data Loading

In [None]:
# Provide the names of the required data files without the extensions (those files should be in the ./data/ folder)
audio_file          = "test"    # Audio file for analysis. Must be a single-channel/mono MP3 file
frame_analysis_file = "test"    # File containing autoencoder reconstruction loss values. Should be a .csv file
labels_file         = "test"    # File containg the labels (highlights) should be a .txt file in the format (per line):
                                # 'mm:ss - mm:ss' representing (start_time - end_time)

# Set the parameters for the length of the video
minutes = 60
seconds = 0

In [None]:
video_length    = 60 * minutes + seconds
labels          = []

# Parse the .txt file containing the labels
with open(os.path.join('data', labels_file + ".txt"), "r") as f:
    for line in f.readlines():
        start_time, end_time                    = line.split("-")
        start_time_minutes, start_time_seconds  = start_time.split(":")
        end_time_minutes, end_time_seconds      = end_time.split(":")
        labels.append([int(start_time_minutes.strip() + start_time_seconds.strip()),
                       int(end_time_minutes.strip() + end_time_seconds.strip())])

In [None]:
# Load and analyse the given audio file
a                       = pydub.AudioSegment.from_mp3(f"data/{audio_file}.mp3")
y                       = np.array(a.get_array_of_samples())
data_binr, data_rmsr, _ = eval_sound(y,a)
plt.plot(data_binr)
plt.show()

In [None]:
# Load the autoencoder reconstruction loss values
singl_frm = np.loadtxt(f"data/{frame_analysis_file}.csv", delimiter = ",")
plt.plot(singl_frm)
plt.show()

### Results
#### Perform LOF on Audio Data

In [None]:

interm = np.array(data_binr)
r = 480
interm  = lof(interm, r )
signal  = interm

plt.plot(interm)
plt.show()
print(np.quantile(interm, 0.999))
interm = interm >= np.quantile(interm, 0.999) + 0
interm  = kaiser_wind(interm, 180, 0.2 )
interm[interm>1] = 1
plt.plot(interm)
plt.show()

segms = get_anomls(interm, signal, 0.5)
print(len(interm))
print(len(segms))
for s in segms:
    print(s[0]/360, s[-1]/360, s[1])
segms.sort(key=lambda x: x[1])
print("============")

for s in segms:
    print(s[0] / 360, s[-1] / 360, s[1])
ap, prec, rec = get_ap(video_length, interm, labels)
print("AP:", ap)
print("PRECISION:", prec)
print("RECALL:", rec)
print("START@6, END@6, COVERAGE@6", get_ap_at_k(video_length, segms, labels))

#### Get LOF of Single Frame Reconstruction

In [None]:
print(len(singl_frm))
interm = np.array(singl_frm)
r = 240
interm  = lof(interm, r )
signal  = interm

plt.plot(interm)
plt.show()
print( np.quantile(interm, 0.995))
interm = interm > np.quantile(interm, 0.995) + 0
interm  = kaiser_wind(interm, 360, 0.1 )
interm[interm>1] = 1

plt.plot(interm)
plt.show()
segms = get_anomls(interm, signal, 0.5)
print(len(interm))
print(len(segms))
for s in segms:
    s[0]-=90
    print(s[0]/360, s[-1]/360, s[1])
segms.sort(key=lambda x: x[1])


print("============")
for s in segms:
    print(s[0]/360, s[-1]/360, s[1])
ap, prec, rec = get_ap(video_length, interm, labels, start = 90, ends = len(interm)+90)
print("AP:", ap)
print("PRECISION:", prec)
print("RECALL:", rec)
print("START@6, END@6, COVERAGE@6",get_ap_at_k(video_length, segms, labels))

#### Regressor on Audio Data

In [None]:
sc = StandardScaler()
r = 180
d_interm = sc.fit_transform(X  =np.array(data_binr).reshape(-1,1), y = None)
plt.plot(d_interm)
plt.show()
unsup_linreg = polyreg_outliar_mse([d_interm.reshape(-1,)],r)
interm = eval_linreg(kaiser_wind(d_interm.reshape(-1,), r+30, 2), r, unsup_linreg)
plt.plot(interm)
plt.show()
print( np.quantile(interm, 0.95))
interm = interm > np.quantile(interm, 0.95) + 0
interm  = kaiser_wind(interm, 360, 0.1 )
interm[interm>1] = 1
plt.plot(interm)
plt.show()
segms = get_anomls(interm, signal, 0.5)
print(len(segms))
for s in segms:
    s[0]+=2*r
    s[-1]+=2*r
    print(s[0]/360, s[-1]/360, s[1])
segms.sort(key=lambda x: x[1])
print("============")


for s in segms:
    print(s[0]/360, s[-1]/360, s[1])
ap, prec, rec = get_ap(video_length, interm, labels, start = 2*r, ends = len(interm)+2*r )
print("AP:", ap)
print("PRECISION:", prec)
print("RECALL:", rec)
print("START@6, END@6, COVERAGE@6",get_ap_at_k(video_length, segms, labels))