In [3]:
import configparser
import os
import sys
from os import path

PATH_ROOT = ""
PATH_DATA = ""

creds_path_ar = ["../../credentials.ini", "credentials.ini"]

for creds_path in creds_path_ar:
    if path.exists(creds_path):
        config_parser = configparser.ConfigParser()
        config_parser.read(creds_path)
        PATH_ROOT = config_parser['MAIN']["PATH_ROOT"]
        PATH_DATA = config_parser['MAIN']["PATH_DATA"]
        WANDB_enable = config_parser['MAIN']["WANDB_ENABLE"] == 'TRUE'
        ENV = config_parser['MAIN']["ENV"]

# adding cwd to path to avoid "No module named src.*" errors
sys.path.insert(0, os.path.join(PATH_ROOT))

In [56]:
import argparse
import random
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torch.optim as optim
from sklearn.metrics import roc_curve, auc
from src.data import get_data
from src.data.iterable_dataset import Config, DataDict, StreamingDataset, iq_to_spectogram, \
    normalize
from src.models import arch_setup, tcn_model3
from src.data import get_data
from src.visualization import metrics
from src.features import specto_feat
import wandb
import logging


In [30]:
cd {PATH_ROOT}
wandb.init()

/home/ubuntu/sota-mafat-radar


In [123]:
model_paths = ['sota-mafat/sota-mafat-base/1epmi6lf','sota-mafat/sota-mafat-base/3s0bv1dr']
test_path = 'MAFAT RADAR Challenge - FULL Public Test Set V1'
final_test_path = 'MAFAT RADAR Challenge - Private Test Set V1'


# Two Parts

1. Run off the test data to get the test scores for a given model (to give us some indication of the accuracy)
2. With the various models, create a LR using the test data as its own train/val.

### PART 1

In [132]:
def load_model(model_path: str):
    '''
    Load Model from Wandb
    '''
    wandb.restore('data/models/model.pth', run_path=model_path)
    return torch.load('data/models/model.pth')


def load_testset(test_path: str):
    '''
    Load Test Data
    '''
    test_data = pd.DataFrame.from_dict(get_data.load_data(test_path, PATH_DATA), orient='index').transpose()
    return test_data


def run_predictions(model, test_df, final_submission = False):
    '''
    Have the predictions ready for submission
    '''
    test_df['output_array'] = test_df['iq_sweep_burst'].progress_apply(iq_to_spectogram)
    test_df['output_array'] = test_df.progress_apply(lambda row: specto_feat.max_value_on_doppler(row['output_array'], row['doppler_burst']), axis=1)
    test_df['output_array'] = test_df['output_array'].progress_apply(normalize)
    test_x = torch.from_numpy(np.stack(test_df['output_array'].tolist(), axis=0).astype(np.float32)).unsqueeze(1)

    if torch.cuda.is_available():
        device = torch.device('cuda:0')
    else:
        device = torch.device('cpu:0')

    # Creating DataFrame with the probability prediction for each segment
    submission = pd.DataFrame()
    submission['segment_id'] = test_df['segment_id']
    submission['prediction'] = model(test_x.to(device)).detach().cpu().numpy()
    if not final_submission:
        test_data['target_type'].replace({'animal': 0, 'human': 1}, inplace=True)
        submission['label'] = test_df['target_type']
    return submission

def check_model_auc(model_path: str, test_path: str):
    '''
    1. Load the Model (using load_model())
    2. Load the Test Data (using load_testdata())
    3. Return the predictionsauc and acc scores of predictions
    '''
    model = load_model(model_path)
    test_df = load_testset(test_path)
    predictions = run_predictions(model, test_df)
    return metrics.model_scores(predictions['label'], predictions['prediction'])

In [128]:
# model_path = 'sota-mafat/sota-mafat-base/3s0bv1dr'
# model = load_model(model_path)
# test_path = 'MAFAT RADAR Challenge - FULL Public Test Set V1'
# test_dict = load_testset(test_path)
# predics = run_predicions(model, test_path)

In [117]:
check_model_auc(model_path,test_path)

100%|██████████| 284/284 [00:00<00:00, 2104.49it/s]
100%|██████████| 284/284 [00:00<00:00, 2884.62it/s]
100%|██████████| 284/284 [00:00<00:00, 12849.03it/s]


0.5217887631526702

### PART 2

In [98]:
from sklearn.linear_model import LogisticRegression as LogR
from sklearn.model_selection import train_test_split

In [145]:
def basic_mean(model_paths: list, test_path, final_submission = False):
    preds = []
    test_df = load_testset(test_path)
    for model_path in model_paths:
        model = load_model(model_path)
        pred = run_predictions(model, test_df, final_submission)
        preds.append(pred['prediction'])
    df = pd.concat(preds, axis=1)
    pred = df.mean(axis=1)
    labels = test_df['target_type']
    return metrics.model_scores(labels, pred)
    

In [146]:
def weighted_mean(model_paths: list, test_path, final_submission = False):
    preds = []
    scores = []
    test_df = load_testset(test_path)
    labels = test_df['target_type']
    for model_path in model_paths:
        model = load_model(model_path)
        pred = run_predictions(model, test_df, final_submission)
        preds.append(pred['prediction'])
        scores.append(metrics.model_scores(labels,pred['prediction']))
    df = pd.concat(preds, axis=1)
    scores = np.array(scores)
    scores = scores / np.sum(scores)
    weighted_mean = (scores*df).mean(axis=1)
    print(weighted_mean.shape)
    return metrics.model_scores(labels, weighted_mean), scores
    

In [147]:
def lr_model(model_paths: list, test_path, final_submission=False):
    preds = []
    col_names = range(len(model_paths))
    test_df = load_testset(test_path)
    for model_path in model_paths:
        model = load_model(model_path)
        pred = run_predictions(model, test_df, final_submission)
        preds.append(pred['prediction'])
    df = pd.concat(preds, axis=1)
    df.columns = col_names 
    labels = test_df['target_type']
    X_train, X_test,y_train, y_test = train_test_split(df, labels, test_size=0.2, random_state=43)
    clf = LogR().fit(X_train,y_train)
    y_pred = clf.predict(X_test)
    return metrics.model_scores(y_test, y_pred), clf.coef_, clf.intercept_


In [121]:
# weighted_mean(model_paths, test_path)

100%|██████████| 284/284 [00:00<00:00, 2088.72it/s]
100%|██████████| 284/284 [00:00<00:00, 3161.28it/s]
100%|██████████| 284/284 [00:00<00:00, 12767.50it/s]
100%|██████████| 284/284 [00:00<00:00, 2114.76it/s]
100%|██████████| 284/284 [00:00<00:00, 3240.84it/s]
100%|██████████| 284/284 [00:00<00:00, 13159.33it/s]
(284,)


0.5217887631526702

In [151]:
def run_ensemble(model_paths, old_test_path, final_test_path, ensemble_method, final_submission= True):
    preds = []
    test_df = load_testset(final_test_path)
    for model_path in model_paths:
        model = load_model(model_path)
        pred = run_predictions(model, test_df, final_submission)
        preds.append(pred['prediction'])
    
    df = pd.concat(preds, axis=1)

    if ensemble_method == "weighted_mean":
        _, scores = weighted_mean(model_paths, old_test_path, final_submission)
        prediction = (scores*df).mean(axis=1)

    elif ensemble_method == "lr_model":
        _, coef, bias = lr_model(model_paths, old_test_path, final_submission)
        prediction = (coef*df).mean(axis=1) + bias
    else:
        prediction = df
        
    submission = pd.DataFrame()
    test_df = pd.DataFrame.from_dict(get_data.load_data(final_test_path, PATH_DATA), orient='index').transpose()
    submission['segment_id'] = test_df['segment_id']
    submission['prediction'] = prediction
    return submission

In [152]:
run_ensemble(model_paths, test_path, final_test_path, 'lr_model')

100%|██████████| 248/248 [00:00<00:00, 1594.01it/s]
100%|██████████| 248/248 [00:00<00:00, 3168.86it/s]
100%|██████████| 248/248 [00:00<00:00, 12758.81it/s]
100%|██████████| 248/248 [00:00<00:00, 2103.82it/s]
100%|██████████| 248/248 [00:00<00:00, 2999.77it/s]
100%|██████████| 248/248 [00:00<00:00, 12802.31it/s]
100%|██████████| 284/284 [00:00<00:00, 2069.67it/s]
100%|██████████| 284/284 [00:00<00:00, 2883.47it/s]
100%|██████████| 284/284 [00:00<00:00, 9576.27it/s]
100%|██████████| 284/284 [00:00<00:00, 1981.87it/s]
100%|██████████| 284/284 [00:00<00:00, 3213.32it/s]
100%|██████████| 284/284 [00:00<00:00, 6803.02it/s]


ValueError: y_true takes value in {'animal', 'human'} and pos_label is not specified: either make y_true take value in {0, 1} or {-1, 1} or pass pos_label explicitly.