In [None]:
import pandas as pd
import os
import random
import pickle
import matplotlib.pyplot as plt
import numpy as np

from mafat_radar_challenge.utils import fft, normalize, max_value_on_doppler, hann

In [None]:
mount_path = '/home/agarcia/repos/mafat-radar-challenge/mafat_radar_challenge'
competition_path = 'data'

In [None]:
def load_data(file_path):
    """
    Reads all data files (metadata and signal matrix data) as python dictionary,
    the pkl and csv files must have the same file name.

    Arguments:
    file_path -- {str} -- path to the iq_matrix file and metadata file

    Returns:
    Python dictionary
    """
    pkl = load_pkl_data(file_path)
    meta = load_csv_metadata(file_path)
    data_dictionary = {**meta, **pkl}

    for key in data_dictionary.keys():
        data_dictionary[key] = np.array(data_dictionary[key])

    return data_dictionary
  


def load_pkl_data(file_path):
    """
    Reads pickle file as a python dictionary (only Signal data).

    Arguments:
    file_path -- {str} -- path to pickle iq_matrix file

    Returns:
    Python dictionary
    """
    path = os.path.join(mount_path, competition_path, file_path + '.pkl')
    with open(path, 'rb') as data:
        output = pickle.load(data)
    return output


def load_csv_metadata(file_path):
    """
    Reads csv as pandas DataFrame (only Metadata).

    Arguments:
    file_path -- {str} -- path to csv metadata file

    Returns:
    Pandas DataFarme
    """
    path = os.path.join(mount_path, competition_path, file_path + '.csv')
    with open(path, 'rb') as data:
        output = pd.read_csv(data)
    return output

# Extract spectrograms without max_doppler

In [None]:
train_df = pd.read_csv("/home/agarcia/repos/mafat-radar-challenge/mafat_radar_challenge/data/MAFAT RADAR Challenge - Training Set V1.csv")
aux_df = pd.read_csv("/home/agarcia/repos/mafat-radar-challenge/mafat_radar_challenge/data/MAFAT RADAR Challenge - Auxiliary Experiment Set V2.csv")
synth_df = pd.read_csv("/home/agarcia/repos/mafat-radar-challenge/mafat_radar_challenge/data/MAFAT RADAR Challenge - Auxiliary Synthetic Set V2.csv")

In [None]:
train_burst = load_pkl_data("MAFAT RADAR Challenge - Training Set V1")["iq_sweep_burst"]
aux_burst = load_pkl_data("MAFAT RADAR Challenge - Auxiliary Experiment Set V2")["iq_sweep_burst"]
synth_burst = load_pkl_data("MAFAT RADAR Challenge - Auxiliary Synthetic Set V2")["iq_sweep_burst"]

In [None]:
train_burst.shape

In [None]:
train_spectrogram = list()
aux_spectrogram = list()
synth_spectrogram = list()

for iq in train_burst:
    train_spectrogram.append(fft(iq, axis=0))
train_spectrogram = np.array(train_spectrogram)
    
for iq in aux_burst:
    aux_spectrogram.append(fft(iq, axis=0))
aux_spectrogram = np.array(aux_spectrogram)
    
for iq in synth_burst:
    synth_spectrogram.append(fft(iq, axis=0))
synth_spectrogram = np.array(synth_spectrogram)

In [None]:
del train_burst
del aux_burst
del synth_burst

In [None]:
full_df = pd.concat([train_df, aux_df, synth_df])

In [None]:
full_spectrogram = np.concatenate([train_spectrogram, aux_spectrogram, synth_spectrogram])

In [None]:
np.save("/home/agarcia/repos/mafat-radar-challenge/mafat_radar_challenge/data/mafat_full_spectrogram_raw.npy", full_spectrogram)

# Extract centered spectrograms

Instead of mean of doppler vector to center spectrogram, percentile 50 with 'lower' interpolation is used.

In [None]:
train_burst = load_pkl_data("MAFAT RADAR Challenge - Training Set V1")["doppler_burst"]
aux_burst = load_pkl_data("MAFAT RADAR Challenge - Auxiliary Experiment Set V2")["doppler_burst"]
synth_burst = load_pkl_data("MAFAT RADAR Challenge - Auxiliary Synthetic Set V2")["doppler_burst"]

In [None]:
full_burst = np.concatenate([train_burst, aux_burst, synth_burst])

In [None]:
full_spectrogram = np.load("/home/agarcia/repos/mafat-radar-challenge/mafat_radar_challenge/data/mafat_full_spectrogram_raw.npy")

In [None]:
centered_full_spectrogram = list()
for doppler_burst, spectrogram in zip(full_burst, full_spectrogram):
    offset = 63 - int(np.percentile(doppler_burst, 50, interpolation="lower"))
    spectrogram = np.roll(spectrogram, offset, axis=0)
    centered_full_spectrogram.append(spectrogram)

In [None]:
centered_full_spectrogram = np.array(centered_full_spectrogram)

In [None]:
centered_full_spectrogram.shape

In [None]:
np.save("/home/agarcia/repos/mafat-radar-challenge/mafat_radar_challenge/data/mafat_full_spectrogram_centered_v2.npy", centered_full_spectrogram)

In [None]:
train_df = pd.read_csv("/home/agarcia/repos/mafat-radar-challenge/mafat_radar_challenge/data/MAFAT RADAR Challenge - Training Set V1.csv")
aux_df = pd.read_csv("/home/agarcia/repos/mafat-radar-challenge/mafat_radar_challenge/data/MAFAT RADAR Challenge - Auxiliary Experiment Set V2.csv")
synth_df = pd.read_csv("/home/agarcia/repos/mafat-radar-challenge/mafat_radar_challenge/data/MAFAT RADAR Challenge - Auxiliary Synthetic Set V2.csv")
full_spec_v2_csv = pd.concat([train_df, aux_df, synth_df])
full_spec_v2_csv.loc[full_spec_v2_csv.target_type=="human", "target_type"] = 1
full_spec_v2_csv.loc[full_spec_v2_csv.target_type=="animal", "target_type"] = 0
full_spec_v2_csv.target_type = full_spec_v2_csv.target_type.apply(int)
assert(len(full_spec_v2_csv) == (len(train_df) + len(aux_df) + len(synth_df)))
full_spec_v2_csv["source"] = None
full_spec_v2_csv.iloc[:len(train_df), -1] = "train"
full_spec_v2_csv.iloc[len(train_df):(len(train_df)+ len(aux_df)), -1] = "aux"
full_spec_v2_csv.iloc[(len(train_df)+ len(aux_df)):, -1] = "synth"
full_spec_v2_csv.loc[full_spec_v2_csv.source=="synth", "segment_id"] -= 2000000


In [None]:
full_spec_v2_csv.to_csv("/home/agarcia/repos/mafat-radar-challenge/mafat_radar_challenge/data/mafat_full_data_v9.csv", index=False)

### Test

In [None]:
test_data = load_pkl_data("MAFAT RADAR Challenge - Public Test Set V1")

In [None]:
centered_test_spectrogram = list()
for doppler_burst, iq in zip(test_data["doppler_burst"], test_data["iq_sweep_burst"]):
    spectrogram = fft(iq, axis=0)
    offset = 63 - int(np.percentile(doppler_burst, 50, interpolation="lower"))
    spectrogram = np.roll(spectrogram, offset, axis=0)
    centered_test_spectrogram.append(spectrogram)
centered_test_spectrogram = np.array(centered_test_spectrogram)

In [None]:
centered_test_spectrogram.shape

In [None]:
plt.imshow(centered_test_spectrogram[0])

In [None]:
np.save("/home/agarcia/repos/mafat-radar-challenge/mafat_radar_challenge/data/mafat_test_v9_spectrogram.npy", centered_test_spectrogram)

# Create K Folds

In [None]:
from sklearn.model_selection import GroupKFold
from sklearn.utils import shuffle

In [None]:
train_df = pd.read_csv("/home/agarcia/repos/mafat-radar-challenge/mafat_radar_challenge/data/MAFAT RADAR Challenge - Training Set V1.csv")
train_df["source"] = "train"
aux_df = pd.read_csv("/home/agarcia/repos/mafat-radar-challenge/mafat_radar_challenge/data/MAFAT RADAR Challenge - Auxiliary Experiment Set V2.csv")
aux_df["source"] = "aux"
synth_df = pd.read_csv("/home/agarcia/repos/mafat-radar-challenge/mafat_radar_challenge/data/MAFAT RADAR Challenge - Auxiliary Synthetic Set V2.csv")
synth_df["segment_id"] = synth_df["segment_id"] - 2000000
synth_df["source"] = "synth"
full_df = pd.concat([train_df, aux_df, synth_df])
full_df = full_df.reset_index()
full_df["target_type"] = full_df.target_type.apply(lambda x: 1 if x == "human" else 0)

## V9

In [None]:
all_spectrograms = np.load("/home/agarcia/repos/mafat-radar-challenge/mafat_radar_challenge/data/mafat_full_spectrogram_centered_v2.npy")

In [None]:
assert len(all_spectrograms) == len(full_df)

In [None]:
group_kfold = GroupKFold(n_splits=5)
indexes, y_shuffled, groups_shuffled = shuffle(full_df.index.values, full_df.target_type.values, full_df.track_id.values, random_state=0)
for fold, (train_index, test_index) in enumerate(group_kfold.split(indexes, y_shuffled, groups_shuffled)):
    print(fold)
    print("TRAIN:", train_index, "TEST:", test_index)
    train_fold_idx, test_fold_idx = indexes[train_index], indexes[test_index]
    spectrograms_train, spectrograms_test = all_spectrograms[train_fold_idx], all_spectrograms[test_fold_idx]
    df_train, df_test = full_df.loc[train_fold_idx, :].copy(), full_df.loc[test_fold_idx, :].copy()
    print(len(spectrograms_train), len(spectrograms_test), len(df_train), len(df_test))
    np.save("/home/agarcia/repos/mafat-radar-challenge/mafat_radar_challenge/data/mafat_train_v9_spectrogram_fold{}.npy".format(fold), spectrograms_train)
    np.save("/home/agarcia/repos/mafat-radar-challenge/mafat_radar_challenge/data/mafat_val_v9_spectrogram_fold{}.npy".format(fold), spectrograms_test)
    df_train.to_csv("/home/agarcia/repos/mafat-radar-challenge/mafat_radar_challenge/data/mafat_train_v9_fold{}.csv".format(fold), index=False)
    df_test.to_csv("/home/agarcia/repos/mafat-radar-challenge/mafat_radar_challenge/data/mafat_val_v9_fold{}.csv".format(fold), index=False)
    del spectrograms_train
    del spectrograms_test

In [None]:
df_test.track_id.isin(df_train.track_id).sum()

In [None]:
df_train.track_id.isin(df_test.track_id).sum()

In [None]:
df_test.groupby("target_type").count()

In [None]:
df_train.groupby("target_type").count()

# Extract centered Background spectrogram

In [None]:
back_data = load_pkl_data("MAFAT RADAR Challenge - Auxiliary Background(empty) Set V1")

In [None]:
centered_full_spectrogram = list()
for doppler_burst, iq in zip(back_data["doppler_burst"], back_data["iq_sweep_burst"]):
    spectrogram = fft(iq, axis=0)
    offset = 63 - int(np.percentile(doppler_burst, 50, interpolation="lower"))
    spectrogram = np.roll(spectrogram, offset, axis=0)
    centered_full_spectrogram.append(spectrogram)

In [None]:
back_spectrogram = np.array(centered_full_spectrogram)

In [None]:
np.save("/home/agarcia/repos/mafat-radar-challenge/mafat_radar_challenge/data/mafat_background_v9_spectrogram.npy", back_spectrogram)

# Extract centered FULL Public test dataset

## v9

In [None]:
back_data = load_pkl_data("MAFAT RADAR Challenge - FULL Public Test Set V1")

In [None]:
centered_full_spectrogram = list()
for doppler_burst, iq in zip(back_data["doppler_burst"], back_data["iq_sweep_burst"]):
    spectrogram = fft(iq, axis=0)
    offset = 63 - int(np.percentile(doppler_burst, 50, interpolation="lower"))
    spectrogram = np.roll(spectrogram, offset, axis=0)
    centered_full_spectrogram.append(spectrogram)

In [None]:
back_spectrogram = np.array(centered_full_spectrogram)

In [None]:
plt.imshow(back_spectrogram[0])

In [None]:
np.save("/home/agarcia/repos/mafat-radar-challenge/mafat_radar_challenge/data/mafat_full_public_test_v9_spectrogram.npy", back_spectrogram)

- Metadata

In [None]:
df = pd.read_csv("/home/agarcia/repos/mafat-radar-challenge/mafat_radar_challenge/data/MAFAT RADAR Challenge - FULL Public Test Set V1.csv")
df.loc[df.target_type=="human", "target_type"] = 1
df.loc[df.target_type=="animal", "target_type"] = 0
df.target_type = df.target_type.apply(int)
df.to_csv("/home/agarcia/repos/mafat-radar-challenge/mafat_radar_challenge/data/mafat_full_public_test_set.csv", index=False)

- Mapping metadata

In [None]:
df = pd.read_csv("/home/agarcia/repos/mafat-radar-challenge/mafat_radar_challenge/data/MAFAT RADAR Challenge - FULL Public Test Set V1.csv")
df.loc[df.target_type=="human", "target_type"] = 1
df.loc[df.target_type=="animal", "target_type"] = 0
df.target_type = df.target_type.apply(int)
mapping = pd.read_csv("/home/agarcia/repos/mafat-radar-challenge/mafat_radar_challenge/data/mapping_public_set_to_full_public_set.csv")

In [None]:
df["source"] = None
df.loc[df.segment_id.isin(mapping.new_segment_id_full_public), "source"] = "public"
df.loc[~df.segment_id.isin(mapping.new_segment_id_full_public), "source"] = "extra_public"

In [None]:
df.to_csv("/home/agarcia/repos/mafat-radar-challenge/mafat_radar_challenge/data/mafat_full_public_test_set.csv", index=False)

# Extract centered Private test dataset

## v9

In [None]:
back_data = load_pkl_data("MAFAT RADAR Challenge - Private Test Set V1")

In [None]:
centered_full_spectrogram = list()
for doppler_burst, iq in zip(back_data["doppler_burst"], back_data["iq_sweep_burst"]):
    spectrogram = fft(iq, axis=0)
    offset = 63 - int(np.percentile(doppler_burst, 50, interpolation="lower"))
    spectrogram = np.roll(spectrogram, offset, axis=0)
    centered_full_spectrogram.append(spectrogram)

In [None]:
back_spectrogram = np.array(centered_full_spectrogram)

In [None]:
for x in back_spectrogram:
    plt.imshow(x)
    plt.show()

In [None]:
np.save("/home/agarcia/repos/mafat-radar-challenge/mafat_radar_challenge/data/mafat_private_test_v9_spectrogram.npy", back_spectrogram)