In [1]:
import sys
sys.path.append('../src')
from models import *
from strategies import *
from custom_datasets import *
import numpy as np
np.random.seed(0)
import tqdm

import pytorch_lightning as pl
import segmentation_models_pytorch as smp
import os
import torch
torch.cuda.empty_cache()
import torch.nn as nn

from PIL import Image
import torchvision.transforms as T
import torchvision.transforms.functional as TF
from torch.utils.data import Dataset
import pandas as pd

import time
import json



In [2]:
main_path = "/root/Master_Thesis/"
dataframes_path = main_path + "data/dataframes/"
sam_path = main_path + "sam/sam_vit_h_4b8939.pth"
expirements_path = main_path+"expirements/"

In [3]:
df_name = "brain_df"
train_df = pd.read_csv(dataframes_path+"brain_df_train.csv")
test_df = pd.read_csv(dataframes_path+"brain_df_test.csv")

## Too misclassifications
# df_name = "fire_df"
# train_df = pd.read_csv(dataframes_path+"fire_df_train.csv")
# test_df = pd.read_csv(dataframes_path+"fire_df_test.csv")

# Couldn't learn from it
# df_name = "aerial_df"
# train_df = pd.read_csv(dataframes_path+"aerial_df_train.csv")
# test_df = pd.read_csv(dataframes_path+"aerial_df_test.csv")

# Couldn't learn from it
# df_name = "lung_df"
# train_df = pd.read_csv(dataframes_path+"lung_df_train.csv")
# test_df = pd.read_csv(dataframes_path+"lung_df_test.csv")

# Couldn't learn from it
# df_name = "lung_tumor_df"
# train_df = pd.read_csv(dataframes_path+"lung_tumor_df_train.csv")
# test_df = pd.read_csv(dataframes_path+"lung_tumor_df_test.csv")

In [4]:
len(test_df)

1179

In [5]:
params = {'n_epoch': 25,
          'train_args':{'batch_size': 4, 'num_workers': 1},
          'test_args':{'batch_size': 256, 'num_workers': 1},
          'optimizer_args':{'lr': 5e-3, 'momentum': 0.9},
          'use_sam': False,
          'use_predictor': False,
          'use_generator': False,
          'init_set_size': 100,
          'query_num': 5, #int(0.1*len(test_df)),
          'rounds': 2,
          "activate_sam_at_round":1, 
          'test_set_size': len(test_df),
          'df': df_name,
          "img_size": (256, 256)}

print(params)

{'n_epoch': 25, 'train_args': {'batch_size': 4, 'num_workers': 1}, 'test_args': {'batch_size': 256, 'num_workers': 1}, 'optimizer_args': {'lr': 0.005, 'momentum': 0.9}, 'use_sam': False, 'use_predictor': False, 'use_generator': False, 'init_set_size': 100, 'query_num': 5, 'rounds': 2, 'activate_sam_at_round': 1, 'test_set_size': 1179, 'df': 'brain_df'}


In [6]:
if params['use_sam']:
    sam = SAMOracle(checkpoint_path=sam_path, img_size=params["img_size"])
else:
    sam =None

In [7]:
model = smp.create_model(
            'Unet', encoder_name='resnet34', in_channels=3, classes = 1
        )
# torch.save(model.state_dict(), 'init_state.pt')
init_state = torch.load('init_state_Unet.pt')
# net = Net(model, params, device = torch.device("cuda:1"))
net = Net(model, params, device = torch.device("cuda"))

In [8]:
def get_data(handler, train_df, test_df):
    # raw_train = AL_Seg_dataset(main_path + "/data/processed/oracle/", inp_df=train_df, init=True, transform=True, use_sam=params['use_sam'])
    # raw_test = AL_Seg_dataset(main_path + "/data/processed/oracle/", inp_df=test_df, init=True, transform=True, use_sam=params['use_sam'])
    # df = raw_train.df
    return Data(train_df["images"].to_list(), train_df["masks"].to_list(), test_df["images"].to_list(), test_df["masks"].to_list(), handler, img_size=params["img_size"], df=train_df, path= main_path+"/data/processed/", use_sam=params['use_sam'])


In [9]:
data = get_data(Handler, train_df, test_df)
data.initialize_labels(params["init_set_size"])

### Choose an AL strategy from a)RandomSampling b)MarginSampling c)EntropySampling d)KCenterGreedy e)AdversarialBIM

In [10]:
strategy = MarginSampling(dataset=data, net=net, sam=sam)
strategy.net.net.load_state_dict(init_state)
params["strategy"] = "MarginSampling"

In [11]:
torch.cuda.empty_cache()
logs=[]
print("Round 0")
strategy.train()
logits, mask_gt = strategy.predict(data.get_test_data())
iou_score, accuracy, precision, recall, f1_score = data.cal_test_metrics(logits, mask_gt )
logs.append(f"Round 0 testing metrics: iou_score = {iou_score:.2f}, accuracy = {accuracy:.2f}, precision = {precision:.2f}, recall = {recall:.2f}, f1_score = {f1_score:.2f}")
print(logs[0])

torch.save( strategy.net.net.state_dict(), 'trained_before_sam.pt')
print("Model's saved!\n")

for rd in range(1, params["rounds"]):
    print(f"Round {rd}")

    # query
    print("Querying")
    query_idxs = strategy.query(params["query_num"])
    print(query_idxs)

    # update labels
    if params["use_sam"] and rd >= params["activate_sam_at_round"]:
        print("Updating with sam")
        strategy.update(query_idxs, start_sam=True, use_predictor=params["use_predictor"], use_generator=params["use_generator"])
        print("Sam failed to mask: ", strategy.sam_failed)
    else:
        print("Updating without sam")
        strategy.update(query_idxs)
    
    print("Reset and train")
    init_state = torch.load('init_state_Unet.pt')
    strategy.net.net.load_state_dict(init_state)
    strategy.train()

    # calculate accuracy
    logits, maks_gt = strategy.predict(data.get_test_data())
    iou_score, accuracy, precision, recall, f1_score = data.cal_test_metrics(logits, mask_gt )
    # logs.append(f"Round {rd} testing metrics: iou_score = {iou_score:.2f}, accuracy = {accuracy:.2f}, precision = {precision:.2f}, recall = {recall:.2f}, f1_score = {f1_score:.2f}, human_envolved = {strategy.human_envolved}")
    logs.append(f"Round {rd} testing metrics: iou_score = {iou_score:.2f}, accuracy = {accuracy:.2f}, precision = {precision:.2f}, recall = {recall:.2f}, f1_score = {f1_score:.2f}, sam_failed = {strategy.human_envolved}")
    strategy.human_envolved = 0
    print(logs[rd])
    
params['logs'] = logs

for dirname, _, filenames in os.walk(expirements_path):
    filename = "expirement_{}.json".format(len(filenames))
    file_path = os.path.join(dirname, filename)
    with open(file_path, 'w') as f:
        json.dump(params, f)
        print(filename)

Round 0
100


100%|███████████████████████████████████████████████████| 25/25 [02:49<00:00,  6.79s/it, loss=0.275]


Round 0 testing metrics: iou_score = 0.24, accuracy = 0.99, precision = 0.76, recall = 0.26, f1_score = 0.39
Model's saved!

Round 1
Querying
[1233 1271 1266 1256 1249]
Updating with sam
Sam failed to mask:  []
Reset and train
105


100%|███████████████████████████████████████████████████| 25/25 [03:02<00:00,  7.30s/it, loss=0.127]


Round 1 testing metrics: iou_score = 0.52, accuracy = 0.99, precision = 0.81, recall = 0.60, f1_score = 0.69, sam_failed = 0
expirement_37.json


In [12]:
# data = get_data(Handler, train_df, test_df)
# data.initialize_labels(params["init_set_size"])
# strategy = EntropySampling(dataset=data, net=net, sam=sam)
# strategy.net.net.load_state_dict(init_state)
# params["strategy"] = "EntropySampling"

In [13]:
# torch.cuda.empty_cache()
# logs=[]
# print("Round 0")
# strategy.train()
# logits, mask_gt = strategy.predict(data.get_test_data())
# iou_score, accuracy, precision, recall, f1_score = data.cal_test_metrics(logits, mask_gt )
# logs.append(f"Round 0 testing metrics: iou_score = {iou_score:.2f}, accuracy = {accuracy:.2f}, precision = {precision:.2f}, recall = {recall:.2f}, f1_score = {f1_score:.2f}")
# print(logs[0])

# for rd in range(1, params["rounds"]):
#     print(f"Round {rd}")

#     # query
#     print("Querying")
#     query_idxs = strategy.query(params["query_num"])
#     print(query_idxs)

#     # update labels
#     if params["use_sam"] and rd >= params["activate_sam_at_round"]:
#         print("Updating with sam")
#         strategy.update(query_idxs, start_sam=True, use_predictor=params["use_predictor"], use_generator=params["use_generator"])
#     else:
#         print("Updating without sam")
#         strategy.update(query_idxs)
    
#     print("Reset and train")
#     init_state = torch.load('init_state.pt')
#     strategy.net.net.load_state_dict(init_state)
#     strategy.train()

#     # calculate accuracy
#     logits, maks_gt = strategy.predict(data.get_test_data())
#     iou_score, accuracy, precision, recall, f1_score = data.cal_test_metrics(logits, mask_gt )
#     logs.append(f"Round {rd} testing metrics: iou_score = {iou_score:.2f}, accuracy = {accuracy:.2f}, precision = {precision:.2f}, recall = {recall:.2f}, f1_score = {f1_score:.2f}")
#     print(logs[rd])
    
# params['logs'] = logs

# for dirname, _, filenames in os.walk(expirements_path):
#     filename = "expirement_{}.json".format(len(filenames))
#     file_path = os.path.join(dirname, filename)
#     with open(file_path, 'w') as f:
#         json.dump(params, f)
#         print(filename)

In [14]:
# data = get_data(Handler, train_df, test_df)
# data.initialize_labels(params["init_set_size"])
# strategy = BALDDropout(dataset=data, net=net, sam=sam)
# strategy.net.net.load_state_dict(init_state)
# params["strategy"] = "BALDDropout"

In [15]:
# torch.cuda.empty_cache()
# logs=[]
# print("Round 0")
# strategy.train()
# logits, mask_gt = strategy.predict(data.get_test_data())
# iou_score, accuracy, precision, recall, f1_score = data.cal_test_metrics(logits, mask_gt )
# logs.append(f"Round 0 testing metrics: iou_score = {iou_score:.2f}, accuracy = {accuracy:.2f}, precision = {precision:.2f}, recall = {recall:.2f}, f1_score = {f1_score:.2f}")
# print(logs[0])

# for rd in range(1, params["rounds"]):
#     print(f"Round {rd}")

#     # query
#     print("Querying")
#     query_idxs = strategy.query(params["query_num"])
#     print(query_idxs)

#     # update labels
#     if params["use_sam"] and rd >= params["activate_sam_at_round"]:
#         print("Updating with sam")
#         strategy.update(query_idxs, start_sam=True, use_predictor=params["use_predictor"], use_generator=params["use_generator"])
#     else:
#         print("Updating without sam")
#         strategy.update(query_idxs)
    
#     print("Reset and train")
#     init_state = torch.load('init_state.pt')
#     strategy.net.net.load_state_dict(init_state)
#     strategy.train()

#     # calculate accuracy
#     logits, maks_gt = strategy.predict(data.get_test_data())
#     iou_score, accuracy, precision, recall, f1_score = data.cal_test_metrics(logits, mask_gt )
#     logs.append(f"Round {rd} testing metrics: iou_score = {iou_score:.2f}, accuracy = {accuracy:.2f}, precision = {precision:.2f}, recall = {recall:.2f}, f1_score = {f1_score:.2f}")
#     print(logs[rd])
    
# params['logs'] = logs

# for dirname, _, filenames in os.walk(expirements_path):
#     filename = "expirement_{}.json".format(len(filenames))
#     file_path = os.path.join(dirname, filename)
#     with open(file_path, 'w') as f:
#         json.dump(params, f)
#         print(filename)

In [16]:
# data = get_data(Handler, train_df, test_df)
# data.initialize_labels(params["init_set_size"])
# strategy = AdversarialBIM(dataset=data, net=net, sam=sam)
# strategy.net.net.load_state_dict(init_state)
# params["strategy"] = "AdversarialBIM"

In [17]:
# torch.cuda.empty_cache()
# logs=[]
# print("Round 0")
# strategy.train()
# logits, mask_gt = strategy.predict(data.get_test_data())
# iou_score, accuracy, precision, recall, f1_score = data.cal_test_metrics(logits, mask_gt )
# logs.append(f"Round 0 testing metrics: iou_score = {iou_score:.2f}, accuracy = {accuracy:.2f}, precision = {precision:.2f}, recall = {recall:.2f}, f1_score = {f1_score:.2f}")
# print(logs[0])

# for rd in range(1, params["rounds"]):
#     print(f"Round {rd}")

#     # query
#     print("Querying")
#     query_idxs = strategy.query(params["query_num"])
#     print(query_idxs)

#     # update labels
#     if params["use_sam"] and rd >= params["activate_sam_at_round"]:
#         print("Updating with sam")
#         strategy.update(query_idxs, start_sam=True, use_predictor=params["use_predictor"], use_generator=params["use_generator"])
#     else:
#         print("Updating without sam")
#         strategy.update(query_idxs)
    
#     print("Reset and train")
#     init_state = torch.load('init_state.pt')
#     strategy.net.net.load_state_dict(init_state)
#     strategy.train()

#     # calculate accuracy
#     logits, maks_gt = strategy.predict(data.get_test_data())
#     iou_score, accuracy, precision, recall, f1_score = data.cal_test_metrics(logits, mask_gt )
#     logs.append(f"Round {rd} testing metrics: iou_score = {iou_score:.2f}, accuracy = {accuracy:.2f}, precision = {precision:.2f}, recall = {recall:.2f}, f1_score = {f1_score:.2f}")
#     print(logs[rd])
    
# params['logs'] = logs

# for dirname, _, filenames in os.walk(expirements_path):
#     filename = "expirement_{}.json".format(len(filenames))
#     file_path = os.path.join(dirname, filename)
#     with open(file_path, 'w') as f:
#         json.dump(params, f)
#         print(filename)

In [18]:
# data = get_data(Handler, train_df, test_df)
# data.initialize_labels(params["init_set_size"])
# strategy = KCenterGreedy(dataset=data, net=net, sam=sam)
# strategy.net.net.load_state_dict(init_state)
# params["strategy"] = "KCenterGreedy"

In [19]:
# torch.cuda.empty_cache()
# logs=[]
# print("Round 0")
# strategy.train()
# logits, mask_gt = strategy.predict(data.get_test_data())
# iou_score, accuracy, precision, recall, f1_score = data.cal_test_metrics(logits, mask_gt )
# logs.append(f"Round 0 testing metrics: iou_score = {iou_score:.2f}, accuracy = {accuracy:.2f}, precision = {precision:.2f}, recall = {recall:.2f}, f1_score = {f1_score:.2f}")
# print(logs[0])

# for rd in range(1, params["rounds"]):
#     print(f"Round {rd}")

#     # query
#     print("Querying")
#     query_idxs = strategy.query(params["query_num"])
#     print(query_idxs)

#     # update labels
#     if params["use_sam"] and rd >= params["activate_sam_at_round"]:
#         print("Updating with sam")
#         strategy.update(query_idxs, start_sam=True, use_predictor=params["use_predictor"], use_generator=params["use_generator"])
#     else:
#         print("Updating without sam")
#         strategy.update(query_idxs)
    
#     print("Reset and train")
#     init_state = torch.load('init_state.pt')
#     strategy.net.net.load_state_dict(init_state)
#     strategy.train()

#     # calculate accuracy
#     logits, maks_gt = strategy.predict(data.get_test_data())
#     iou_score, accuracy, precision, recall, f1_score = data.cal_test_metrics(logits, mask_gt )
#     logs.append(f"Round {rd} testing metrics: iou_score = {iou_score:.2f}, accuracy = {accuracy:.2f}, precision = {precision:.2f}, recall = {recall:.2f}, f1_score = {f1_score:.2f}")
#     print(logs[rd])
    
# params['logs'] = logs

# for dirname, _, filenames in os.walk(expirements_path):
#     filename = "expirement_{}.json".format(len(filenames))
#     file_path = os.path.join(dirname, filename)
#     with open(file_path, 'w') as f:
#         json.dump(params, f)
#         print(filename)