In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import configparser
import os.path
from os import path
from importlib import reload

creds_path_ar = ["../../credentials.ini","credentials.colab.ini"]
PATH_ROOT = ""
PATH_DATA = ""

for creds_path in creds_path_ar:
    if path.exists(creds_path):
        config_parser = configparser.ConfigParser()
        config_parser.read(creds_path)
        PATH_ROOT = config_parser['MAIN']["PATH_ROOT"]
        PATH_DATA = config_parser['MAIN']["PATH_DATA"]
        WANDB_enable = config_parser['MAIN']["WANDB_ENABLE"] == 'TRUE'
        ENV = config_parser['MAIN']["ENV"]
        break

if ENV=="COLAB":
  from google.colab import drive
  mount_path = '/content/gdrive/'
  drive.mount(mount_path)

Drive already mounted at /content/gdrive/; to attempt to forcibly remount, call drive.mount("/content/gdrive/", force_remount=True).


In [3]:
if WANDB_enable == True:
    if ENV=="COLAB":
      !pip install --upgrade wandb
    import wandb
    !wandb login {config_parser['MAIN']["WANDB_LOGIN"]}
    wandb.init(project="sota-mafat-base")
    os.environ['WANDB_NOTEBOOK_NAME'] = '[SS]Alexnet_pytorch'

In [4]:
cd {PATH_ROOT}

/content/gdrive/My Drive/WORK/ML/develop/MAFAT/sota-mafat-radar


In [5]:
import logging

log_filename = "alexnet_pytorch.log"
if os.path.exists(log_filename):
    os.remove(log_filename)

logging.basicConfig(level=logging.INFO,
                    filename='alexnet_pytorch.log',
                    format="%(asctime)s [%(levelname)s]|%(module)s:%(message)s",)

logging.info("start")
logger = logging.getLogger()


In [6]:
import os
import random
import pickle
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim

from sklearn.metrics import roc_auc_score, roc_curve, auc, accuracy_score
from matplotlib.colors import LinearSegmentedColormap
from termcolor import colored

from src.data import feat_data, get_data, get_data_pipeline
from src.models import arch_setup, base_base_model, alex_model
from src.features import specto_feat,add_data

# Set seed for reproducibility of results
seed_value = 0
os.environ['PYTHONHASHSEED']=str(seed_value)


random.seed(seed_value)
np.random.seed(seed_value)
torch.manual_seed(0)

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

if torch.cuda.is_available():
    device = torch.device('cuda:0')
else:
    device = torch.device('cpu:0')

In [7]:
config = dict()
config['num_tracks'] = 3
config['val_ratio'] = 3
config['shift_segment'] = 1 # list(np.arange(1,31))
config['get_shifts'] = True
config['get_horizontal_flip'] = True
config['get_vertical_flip'] = False

batch_size = 32
lr = 1e-4

In [8]:
  #experiment_auxiliary = 'MAFAT RADAR Challenge - Auxiliary Experiment Set V2'
  #experiment_auxiliary_df = get_data.load_data(experiment_auxiliary, PATH_DATA)
  #logger.info(f"experiment_auxiliary:{experiment_auxiliary_df['date_index'].shape}")

  #train_aux = get_data.aux_split(experiment_auxiliary_df, numtracks= 3)
  #logger.info(f"train_aux:{train_aux['date_index'].shape}")

  train_path = 'MAFAT RADAR Challenge - Training Set V1'
  training_dict = get_data.load_data(train_path, PATH_DATA)

  # Adding segments from the experiment auxiliary set to the training set
  #train_dict = get_data.append_dict(training_dict, train_aux)
  train_dict = training_dict

  #logger.info(f"training_dict({training_dict['date_index'].shape}) + aux dataset({train_aux['date_index'].shape}) = full train({train_dict['date_index'].shape})")

  full_data = pd.DataFrame.from_dict(train_dict,orient='index').transpose()

  #split Tracks here to only do augmentation on train set
  full_data = get_data.split_train_val_as_df(full_data, ratio=3)

  logger.info(f"train only:{len(full_data[full_data.is_validation == False])}.  val only:{len(full_data[full_data.is_validation == True])}")

  # to free ram space
  #del experiment_auxiliary_df 
  #del train_aux
  del training_dict

  del train_dict

In [9]:
full_data['augmentation_info']=np.empty((len(full_data), 0)).tolist()
full_data = add_data.generate_shifts(full_data,shift_by=16)

shift:16


In [10]:
full_data = add_data.generate_flips(full_data,mode='vertical')

In [11]:
full_data

Unnamed: 0,segment_id,track_id,geolocation_type,geolocation_id,sensor_id,snr_type,date_index,target_type,doppler_burst,iq_sweep_burst,is_validation,augmentation_info
0,0,11,C,1,1,HighSNR,27,animal,"[14, 14, 14, 43, 43, 43, 15, 15, 15, 17, 14, 1...","[[(6900.381244659424+6699.4177532196045j), (21...",True,[]
1,1,16,C,1,1,HighSNR,27,animal,"[123, 123, 123, 5, 5, 6, 5, 9, 4, 4, 4, 4, 4, ...","[[(-2723.534037590027-36.075279235839844j), (6...",False,[]
2,2,16,C,1,1,HighSNR,27,animal,"[122, 122, 122, 122, 123, 120, 120, 117, 119, ...","[[(2121.38956451416-1027.161826133728j), (159....",False,[]
3,3,29,C,1,1,HighSNR,27,animal,"[75, 75, 75, 55, 32, 31, 33, 36, 30, 31, 31, 2...","[[(1219.9125831127167-2712.076440811157j), (-7...",True,[]
4,4,29,C,1,1,HighSNR,27,animal,"[25, 25, 27, 26, 29, 28, 25, 24, 18, 22, 24, 2...","[[(-1530.8067255020142-2189.1123847961426j), (...",False,[]
...,...,...,...,...,...,...,...,...,...,...,...,...
20725,213206,347,D,4,4,HighSNR,32,human,,,False,"[{'type': 'shift', 'shift': 16, 'from_segments..."
20726,213209,347,D,4,4,HighSNR,32,human,,,False,"[{'type': 'shift', 'shift': 16, 'from_segments..."
20727,213212,347,D,4,4,HighSNR,32,human,,,False,"[{'type': 'shift', 'shift': 16, 'from_segments..."
20728,213215,347,D,4,4,HighSNR,32,human,,,False,"[{'type': 'shift', 'shift': 16, 'from_segments..."


In [12]:
train_set = arch_setup.DS(full_data[full_data.is_validation==False])
train_set.__getitem__(20029)[0].shape

(1, 126, 32, 1)

In [None]:
full_data = get_data_pipeline.pipeline_trainval(PATH_DATA, config)

In [None]:
|print(train_x.shape)
print(val_x.shape[0])

(10811, 126, 32, 1)
616


In [13]:
train_set = arch_setup.DS(full_data[full_data.is_validation==False])
val_set= arch_setup.DS(full_data[full_data.is_validation==True])

train_y = np.array(full_data[full_data.is_validation==False]['target_type']=='human').astype(int)
val_y = np.array(full_data[full_data.is_validation==True]['target_type']=='human').astype(int)

train_loader=DataLoader(dataset= train_set, batch_size = batch_size, shuffle = True, num_workers = 2)
val_loader=DataLoader(dataset= val_set, batch_size = batch_size, shuffle = True, num_workers = 2)

In [14]:
model= alex_model.alex_mdf_model()
# model.apply(init_weights)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

model.to(device)

if WANDB_enable == False:
  wandb = None
else:
    runname = input("Enter WANDB runname(ENTER to skip wandb) :")
    notes = input("Enter run notes :")

    wandb.init(project="sota-mafat-base",name=runname, notes=notes)
    os.environ['WANDB_NOTEBOOK_NAME'] = '[SS]Alexnet_pytorch'
    
    wandb.watch(model)
    wandb.config['data_config'] = config
    wandb.config['train_size'] = train_x.shape[0]
    wandb.config['val_size'] = val_x.shape[0]
    wandb.config['batch_size'] = batch_size
    wandb.config['learning rate'] = lr
    wandb.log(config)


In [15]:
log = arch_setup.train_epochs(train_loader,val_loader,model,criterion,optimizer,num_epochs= 10,device=device,train_y=train_y,val_y=val_y, WANDB_enable = WANDB_enable, wandb= wandb)

started training epoch no. 1


RuntimeError: ignored

In [None]:
arch_setup.plot_loss_train_test(log,model)

In [None]:
arch_setup.plot_ROC_local_gpu(train_loader,val_loader,model,device)

In [None]:
model

alex_mdf_model(
  (arch): AlexNet(
    (features): Sequential(
      (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
      (1): ReLU(inplace=True)
      (2): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
      (4): ReLU(inplace=True)
      (5): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (7): ReLU(inplace=True)
      (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (9): ReLU(inplace=True)
      (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (11): ReLU(inplace=True)
      (12): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    )
    (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
    (classifier): Sequential(
      (0): Dropout(p=0.5, inplace=False)
   

## SUBMIT

In [None]:
test_path = 'MAFAT RADAR Challenge - Public Test Set V1'
test_df = get_data.load_data(test_path, PATH_ROOT + PATH_DATA)
test_df = specto_feat.data_preprocess(test_df.copy())
test_x = test_df['iq_sweep_burst']
test_x = test_x.reshape(list(test_x.shape)+[1])

In [None]:
# Creating DataFrame with the probability prediction for each segment
submission =  pd.DataFrame()
submission['segment_id'] = test_df['segment_id']
submission['prediction'] = model(torch.from_numpy(test_x).to(device).type(torch.float32)).detach().cpu().numpy()
submission['prediction'] = submission['prediction'].astype('float')

# Save submission
submission.to_csv('submission.csv', index=False)