In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision import transforms
from sklearn.model_selection import train_test_split
import random
import numpy as np
from tqdm import tqdm
import argparse
import wandb
from accelerate import Accelerator
from accelerate.utils import DistributedDataParallelKwargs
from LF_library import *
from LF_deep_utils import *
from dataset import *
from LF_utils import *
from sklearn.metrics import precision_score, recall_score, f1_score

np.random.seed(0)
torch.manual_seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# Dataset

In [23]:
# pos_dir = '/home/macula/SMATousi/Gullies/ground_truth/organized_data/All_Pos_Neg/all_pos/rgb_images/'
# neg_dir = '/home/macula/SMATousi/Gullies/ground_truth/organized_data/All_Pos_Neg/all_neg/rgb_images/'

# pos_dem_dir = '/home/macula/SMATousi/Gullies/ground_truth/organized_data/All_Pos_Neg/all_pos/dem/'
# neg_dem_dir = '/home/macula/SMATousi/Gullies/ground_truth/organized_data/All_Pos_Neg/all_neg/dem/'

# pos_gt_mask_dir = '/home/macula/SMATousi/Gullies/ground_truth/organized_data/All_Pos_Neg/all_pos/ground_truth/'
# neg_gt_mask_dir = '/home/macula/SMATousi/Gullies/ground_truth/organized_data/All_Pos_Neg/all_neg/ground_truth/'

# data_dir = '/home/macula/SMATousi/Gullies/ground_truth/organized_data/All_Pos_Neg/combined_folder_true_rgb/'
data_dir = '/home/macula/SMATousi/cluster/docker-images/ollama/MO+IA_test_data_numbered/'

batch_size = 1
learning_rate = 0.0001
number_of_workers = 0
val_percent = 0.0 #You should change this based on the datasest nature


transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor()
])

In [24]:
class SixImageDataset_No_Label(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data = []
        self.tile_numbers = []
        # self.labels = []

        # Load all files and group by tiles
        tile_files = [f for f in os.listdir(data_dir) if f.endswith('.tif')]
        # neg_files = [f for f in os.listdir(neg_dir) if f.endswith('.tif')]
        tile_dict = self.group_files_by_tile(tile_files)
        # print(len(tile_dict))
        # neg_tiles = self.group_files_by_tile(neg_files)

        # Handle class imbalance by oversampling the minority class
        # max_len = max(len(pos_tiles), len(neg_tiles))
        # if len(pos_tiles) > len(neg_tiles):
        #     neg_tiles = self.oversample(neg_tiles, max_len)
        # else:
        #     pos_tiles = self.oversample(pos_tiles, max_len)

        # Combine and store
        self.store_tiles(tile_dict, data_dir)
        # print(self.data[1])
        # print(self.tile_numbers[1])
        # self.store_tiles(neg_tiles, neg_dir, 0)
        
        self.transform = transform

    def group_files_by_tile(self, files):
        tile_dict = {}
        # print(len(files))
        for file in files:
            tile_number = file.split('_')[-1].split('.')[0]
            if tile_number not in tile_dict:
                tile_dict[tile_number] = []
            tile_dict[tile_number].append(file)
        # Only include complete groups
        # return [tile for tile in tile_dict.values() if len(tile) == 6]
        # return [tile for tile in tile_dict.values()]
        # print(tile_dict)
        return tile_dict

    # def oversample(self, tiles, target_length):
    #     # Repeat tiles until the desired length is achieved
    #     return random.choices(tiles, k=target_length)

    def store_tiles(self, tiles_dict, directory):
        for tile_number in tiles_dict.keys():
            # print("----")
            # print(tile_number)
            self.data.append([os.path.join(directory, f) for f in sorted(tiles_dict[tile_number])])
            self.tile_numbers.append(tile_number)
            # print(self.data)
            # self.labels.append(label)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image_paths = self.data[idx]
        images_1 = [imageio.imread(img_path).astype('uint8') for img_path in self.data[idx]]
        images = [transforms.functional.to_pil_image(image) for image in images_1]
        if self.transform:
            images = [self.transform(image) for image in images]

        tile_number = self.tile_numbers[idx]
        # label = torch.tensor(self.labels[idx], dtype=torch.float32)
        return images, tile_number, image_paths
        
full_dataset = SixImageDataset_No_Label(data_dir, transform=transform)


In [25]:
51108/6

8518.0

In [26]:
n_val = int(len(full_dataset) * val_percent)
n_train = len(full_dataset) - n_val
train, val = random_split(full_dataset, [n_train, n_val])

train_loader = DataLoader(train, batch_size=batch_size, shuffle=False, num_workers=number_of_workers, pin_memory=True)
val_loader = DataLoader(val, batch_size=batch_size, shuffle=False, num_workers=number_of_workers, pin_memory=True, drop_last=True)


In [27]:
train_iter = iter(train_loader)

images, tile_number, img_paths = next(train_iter)

  images_1 = [imageio.imread(img_path).astype('uint8') for img_path in self.data[idx]]


In [28]:
images, tile_number, img_paths = next(train_iter)

print(img_paths)
print(tile_number)

  images_1 = [imageio.imread(img_path).astype('uint8') for img_path in self.data[idx]]


[['/home/macula/SMATousi/cluster/docker-images/ollama/MO+IA_test_data_numbered/neg_rgb_0_tile_86.tif'], ['/home/macula/SMATousi/cluster/docker-images/ollama/MO+IA_test_data_numbered/neg_rgb_1_tile_86.tif'], ['/home/macula/SMATousi/cluster/docker-images/ollama/MO+IA_test_data_numbered/neg_rgb_2_tile_86.tif'], ['/home/macula/SMATousi/cluster/docker-images/ollama/MO+IA_test_data_numbered/neg_rgb_3_tile_86.tif'], ['/home/macula/SMATousi/cluster/docker-images/ollama/MO+IA_test_data_numbered/neg_rgb_4_tile_86.tif'], ['/home/macula/SMATousi/cluster/docker-images/ollama/MO+IA_test_data_numbered/neg_rgb_5_tile_86.tif']]
['86']


In [8]:
images[0].shape

torch.Size([1, 3, 128, 128])

# Labeling Functions

Deep leanrning model:

In [9]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

resnet_extractor = ResNetFeatureExtractor()
mlp_classifier = MLPClassifier(input_size=6*2048, hidden_size=512, output_size=1)

model = Gully_Classifier(input_size=6*2048, hidden_size=512, output_size=1).to(device)

state_dict = torch.load('../weak-supervision/trained_models/model_epoch_100.pth')
# state_dict_new = torch.load('./artifacts/new_loss/model_epoch_600.pth')

new_state_dict = {k.replace('module.', ''): v for k, v in state_dict.items()}
model.load_state_dict(new_state_dict)

model.eval()



Gully_Classifier(
  (feature_extractor): ResNetFeatureExtractor(
    (feature_extractor): Sequential(
      (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (4): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
   

Other labeling functions:

In [10]:
# Linear Detector

linear_LF = LineDetector(threshold1=100, 
                         threshold2=200, 
                         threshold=50, 
                         min_line_length=10, 
                         max_line_gap=5, 
                         repetition_threshold=1,
                         tolerance=5)

In [11]:
# Super pixel detector


super_pixel_detector = SuperPixelClassifier(num_segments=50,
                                            circularity_threshold=0.1,
                                            aspect_ratio_threshold=3.5,
                                            detection_threshold=0.7)


# Applying the LFs

In [29]:
test_total_deep_learning_labels = {}
test_total_line_detection_labels = {}
test_total_super_pixel_labels = {}

for i, batch in enumerate(tqdm(train_loader)):
    
    
    images, tile_number, image_paths = batch
    list_of_images = [image.to(device) for image in images]
    # total_david_labels.append(labels.numpy())
    
    # Applying the deep learning model
    deep_learning_output = model(list_of_images)
    preds = torch.round(deep_learning_output.squeeze()).detach().cpu().numpy()
    
    #Applying the line detection model
    line_detection_output = linear_LF.detect_repetitive_lines(images)
    
    #Applying the super pixel detector
    super_pixel_output = super_pixel_detector.run_classification(images)
    
    # Gathering the results
    test_total_deep_learning_labels[tile_number[0]] = int(preds)
    test_total_line_detection_labels[tile_number[0]] = int(line_detection_output)
    test_total_super_pixel_labels[tile_number[0]] = int(super_pixel_output)

    # break

    

  images_1 = [imageio.imread(img_path).astype('uint8') for img_path in self.data[idx]]
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 900/900 [04:35<00:00,  3.27it/s]


In [30]:
import json 

with open(f'test_total_deep_learning_labels.json', "w") as file:
    json.dump(test_total_deep_learning_labels, file)


with open(f'test_total_line_detection_labels.json', "w") as file:
    json.dump(test_total_line_detection_labels, file)


with open(f'test_total_super_pixel_labels.json', "w") as file:
    json.dump(test_total_super_pixel_labels, file)

In [None]:
# import json 

with open(f'train_deep_learning_labels.json', "w") as file:
    json.dump(total_deep_learning_labels, file)

In [52]:
preds

array(0., dtype=float32)

In [49]:
count = 0
for i, batch in enumerate(train_loader):

    images, tile_number = batch

    if len(images) != 1:
        count = count + 1

print(count)

KeyboardInterrupt: 

# Evaluation of the Results

In [16]:
print("For the Deep learning model =====================")
print("Recall: ", recall_score(total_david_labels, total_deep_learning_labels))
print("Precision: ", precision_score(total_david_labels, total_deep_learning_labels))
print("F1 Score: ", f1_score(total_david_labels, total_deep_learning_labels))

print("For the Line detection model model =====================")
print("Recall: ", recall_score(total_david_labels, total_line_detection_labels))
print("Precision: ", precision_score(total_david_labels, total_line_detection_labels))
print("F1 Score: ", f1_score(total_david_labels, total_line_detection_labels))

print("For the super pixel detection model model =====================")
print("Recall: ", recall_score(total_david_labels, total_super_pixel_labels_adjusted))
print("Precision: ", precision_score(total_david_labels, total_super_pixel_labels_adjusted))
print("F1 Score: ", f1_score(total_david_labels, total_super_pixel_labels_adjusted))

Recall:  0.9957081545064378
Precision:  0.9935760171306209
F1 Score:  0.9946409431939979
Recall:  0.6030042918454935
Precision:  0.518450184501845
F1 Score:  0.5575396825396826
Recall:  0.5686695278969958
Precision:  0.5686695278969958
F1 Score:  0.5686695278969958


In [14]:
total_super_pixel_labels_adjusted = np.array(total_super_pixel_labels) * -1

In [15]:
total_super_pixel_labels_adjusted

array([1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0,
       0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1,
       0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0,
       1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1,
       1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1,
       1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1,
       1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0,
       1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0,
       0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,
       0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,
       1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0,
       0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1,
       1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1,