# Final submission
- This file includes the raw data processing and all experiment steps, like data processing, feature extraction, and models combination.

- The expeiment is set on the Google Colab, which means all the hardware is provided by Google standard device: TPU and GPU. 

- The whole expriment is diveded into several parts, according to the logical definition and required performance. For example, the enhancement of images via fuzzy logic before DL is finished via 7 steps, because the GPU performance is not enough to process 6334 pictures in one step.

- This version is the no.7, if some confusion accured, please check former version, which tell more details about my project.

# Data Structure
The workshop dir is on the Google Drive, the root path is "/content/drive/MyDrive/CovidDectection"

- CovidDetection
  - siim-covid19-detection: raw data dir, which is downloaded from Kaggle
    - train
    - test
    - sample_submission.csv
    - train_image_level.csv
    - train_study_level.csv
  - dataset: data dir
    - tmp: processed data dir, 256*256, .jpg
    - fctmp: fuzzy enhanced data dir based on tmp, 256*256, .jpg
    - fgtmp: fuzzy enhanced data dir based on tmp, 256*256, .jpg
    - info: the dir for info summary, which is processed before the model running and easy for using by every version
  - version_n
    - dataset: which is copied from dir, "dataset", and the data is ready for this experiment version.
      - images
        - train
        - val
      - labels
        - train
        - val
      - others: like yolo, which is dir for Yolo algorithmn.
  - submission
    - to be continued.

# Submission file 
- Data processing
- Feature extraction
- Model 
  - fuzzy enhancement (optional)
  - YOLO v5/3
  - Choquet Intergral
- Metrics
  - IoU
  - mAP
  - mAP@0.5-0.95

# Preparation
- to mount Google Drive
- to make dirs
- to download compitetion dataset
- to install required packages
- login in Wandb

Ready, Go!

In [1]:
# to mount Google Drive
# The root path is "/content/drive/"

from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [2]:
!python -V

Python 3.7.12


In [None]:
!pip install matplotlib
!pip install numpy
!pip install opencv-python
!pip install pillow
!pip install pyyaml
!pip install scipy
!pip install torch
!pip install torchvision
!pip install tqdm
!pip install tensorboard
!pip install wandb
!pip install seaborn
!pip install pandas
!pip install coremltools
!pip install onnx
!pip install onnx-simlifier
!pip install scikit-learn
!pip install tensorflow
!pip install tensorflowjs
!pip install thop
!pip install pydicom
!pip install pylibjpeg

In [5]:
# to make workshop dirs

import os
print("Created folder structure")
os.makedirs('/content/drive/MyDrive/CovidDetection', exist_ok=True)
os.makedirs('/content/drive/MyDrive/CovidDetection/dataset', exist_ok=True)
os.makedirs('/content/drive/MyDrive/CovidDetection/dataset/tmp', exist_ok=True)
os.makedirs('/content/drive/MyDrive/CovidDetection/dataset/fctmp', exist_ok=True)
os.makedirs('/content/drive/MyDrive/CovidDetection/dataset/fgtmp', exist_ok=True)
os.makedirs('/content/drive/MyDrive/CovidDetection/dataset/info', exist_ok=True)

os.makedirs('/content/drive/MyDrive/CovidDetection/submission', exist_ok=True)
#os.makedirs('/content/drive/MyDrive/CovidDetection/version_n', exist_ok=True)

Created folder structure


In [7]:
# to download the raw dataset from Kaggle

%cd /content/drive/MyDrive/CovidDetection/
!ls
## the command to download is shown next line, and I have download it, so I commented it out.
#!kaggle competitions download -c siim-covid19-detection
#!cp /content/drive/MyDrive/CovidDetection/siim-covid19-detection/train_image_level.csv /content/drive/MyDrive/CovidDetection/dataset/info/train_image_level.csv
#!cp /content/drive/MyDrive/CovidDetection/siim-covid19-detection/train_study_level.csv /content/drive/MyDrive/CovidDetection/dataset/info/train_study_level.csv

/content/drive/MyDrive/CovidDetection
dataset  submission


In [None]:
import wandb
wandb.login()

In [None]:
import tensorboard as tb

# Part 1 Data processing & Feature extraction

## Part 1-1 Dicom to JPG
Dicom 

In [10]:
%cd /content/drive/MyDrive/CovidDetection/
!ls

/content/drive/MyDrive/CovidDetection
dataset  siim-covid19-detection  submission


In [None]:
import os

from PIL import Image
import pandas as pd
from tqdm.auto import tqdm
import pylibjpeg
import numpy as np
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut

In [None]:
def read_xray(path, voi_lut = True, fix_monochrome = True):
    dicom = pydicom.read_file(path)
    
    # VOI LUT (if available by DICOM device) is used to transform raw DICOM data to "human-friendly" view
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
               
    # depending on this value, X-ray may look inverted - fix that:
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
        
    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
        
    return data

In [None]:
def resize(array, size, keep_ratio=False, resample=Image.LANCZOS):
    im = Image.fromarray(array)
    
    if keep_ratio:
        im.thumbnail((size, size), resample)
    else:
        im = im.resize((size, size), resample)
    
    return im

In [None]:
image_id = []
dim0 = []
dim1 = []
splits = []

for split in ['train', 'test']:
    save_dir = f'/content/drive/MyDrive/CovidDetection/dataset/tmp/{split}/'

    os.makedirs(save_dir, exist_ok=True)
    
    for dirname, _, filenames in tqdm(os.walk(f'/content/drive/MyDrive/CovidDetection/siim-covid19-detection/{split}')):
        for file in filenames:
            # set keep_ratio=True to have original aspect ratio
            xray = read_xray(os.path.join(dirname, file))
            im = resize(xray, size=256)  
            im.save(os.path.join(save_dir, file.replace('dcm', 'jpg')))

            image_id.append(file.replace('.dcm', ''))
            dim0.append(xray.shape[0])
            dim1.append(xray.shape[1])
            splits.append(split)

In [None]:
df = pd.DataFrame.from_dict({'image_id': image_id, 'dim0': dim0, 'dim1': dim1, 'split': splits})
df.to_csv('/content/drive/MyDrive/CovidDetection/info/meta.csv', index=False)

## Part 1-2 
- Raw file:
  - train_image_level.csv
  - train_study_level.csv
- Final file:
  - info_summary.csv
- Other files
  - They are all temp files.

In [12]:
import pandas as pd
import numpy as np
import ast

In [13]:
%cd /content/drive/MyDrive/CovidDetection/dataset/info
!ls

/content/drive/MyDrive/CovidDetection/dataset/info


In [None]:
meta=pd.read_csv("meta.csv")
meta_train=meta.loc[meta.split=='train',['image_id','dim1','dim0']]
meta_train.columns=['image_id','width','height']
meta_test=meta.loc[meta.split=='test',['image_id','dim1','dim0']]
meta_test.columns=['image_id','width','height']
meta_train.head()

In [None]:
train_image_level=pd.read_csv("train_image_level.csv")
train_image_level["id"] = train_image_level["id"].map(lambda x : x.replace("_image",""))
train_image_level.rename(columns={'id':"image_id",'label':"image_label","StudyInstanceUID":"study_id"},inplace=True)
train_image_level.head()

In [None]:
rain_study_level=pd.read_csv("train_study_level.csv")
classes_dict = {
    0 : "Negative for Pneumonia",
    1  : "Typical Appearance",
    2  : "Indeterminate Appearance",
    3  : "Atypical Appearance"
}

# Making one-hot of study_level labels and removing other 4 class columns
train_study_level["one_hot"] = train_study_level.apply(lambda x : np.array([x["Negative for Pneumonia"],
                                                        x["Typical Appearance"],
                                                        x["Indeterminate Appearance"],
                                                        x["Atypical Appearance"]]),axis=1)

train_study_level["pneumonia"] = train_study_level["one_hot"].map(lambda x : classes_dict[np.argmax(x)])
train_study_level["pneumonia_class"] = train_study_level["one_hot"].map(lambda x : np.argmax(x))
train_study_level = train_study_level.drop(["Negative for Pneumonia","Typical Appearance","Indeterminate Appearance","Atypical Appearance","one_hot"],axis=1)
train_study_level["id"] = train_study_level["id"].map(lambda x : x.replace("_study",""))
train_study_level.rename(columns={"id" : "study_id"},inplace=True)
train_study_level.head()

In [None]:
train_info = pd.merge(train_image_level,train_study_level,on = "study_id") # Merging study_df and image_df

train_info = pd.merge(train_info,meta_train,on = "image_id") # Merging to meta_train for height,width

# Filling NaN values 
train_info["boxes"].fillna("[{'x':0,'y':0,'width':1,'height':1}]",inplace=True)
temp = train_info # for going through the data
train_info["boxes"] = train_info["boxes"].map(lambda x : ast.literal_eval(x))


columns = ["image_id","study_id","pneumonia","pneumonia_class","height","width","boxes","image_label"] # for proper order
train_info = train_info[columns]


train_info.to_csv("/content/drive/MyDrive/CovidDetection/dataset/info/train_info.csv",index=False)
train_info.head()

In [None]:
meta_train.to_csv("/content/drive/MyDrive/CovidDetection/dataset/info/meta_train.csv",index=False)
meta_test.to_csv("/content/drive/MyDrive/CovidDetection/dataset/info/meta_test.csv",index=False)
train_image_level.to_csv("/content/drive/MyDrive/CovidDetection/dataset/info/train_image_level_temp.csv",index=False)
train_study_level.to_csv("/content/drive/MyDrive/CovidDetection/dataset/info/train_study_level_temp.csv",index=False)

In [None]:
# Get image path from image_id
def get_path(image_id):
    path = tf.io.gfile.glob(TRAIN_PATH + f"*{image_id}.jpg")[0]
    return path

image_dict = {
    "opacity" : 1,
    "none" : 0
}
df = pd.read_csv("/content/drive/MyDrive/CovidDetection/dataset/info/train_info.csv")

df["image_label"] = df["image_label"].map(lambda x : x.split(" ")[0])
df["image_label_id"] = df["image_label"].map(lambda x  : image_dict[x])
df["filepath"] = df["image_id"].map(get_path)
df.head()

In [None]:
train_df,val_df = train_test_split(df,
                                    test_size=0.2,
                                    random_state = SEED,
                                    stratify = df.image_label.values
                                    )

train_df.loc[:,"data"] = "train"
val_df.loc[:,"data"] = "val"
df = pd.concat([train_df, val_df]).reset_index(drop=True)

In [None]:
train_image_level=pd.read_csv('/content/drive/MyDrive/CovidDetection/dataset/info/train_image_level.csv')
# Modify values in the id column
train_image_level['image_id'] = train_image_level.apply(lambda row: row.id.split('_')[0], axis=1)
# Get image level labels
train_image_level['image_level'] = train_image_level.apply(lambda row: row.label.split(' ')[0], axis=1)
train_image_level.head()

In [None]:
df = df.merge(train_image_level, on='image_id',how="left")
df.to_csv("/content/drive/MyDrive/CovidDetection/dataset/info/train_summary.csv")
df.head()

In [None]:
info=pd.read_csv("train_summary.csv")
info["fgfp"]=info.apply(lambda x:x.filepath.replace('tmp','fgtmp'),axis=1)
info["fcfp"]=info.apply(lambda x:x.filepath.replace('tmp','fctmp'),axis=1)
info.to_csv("info_summary.csv")
info.head()

In [None]:
# don't forget to change the dir name of tmp: the test comes from raw file, but the YOLO uses the val
#!mv /content/drive/MyDrive/CovidDetection/dataset/tmp/test /content/drive/MyDrive/CovidDetection/dataset/tmp/val

# Part 2 Models

In [None]:
# To confirm the work folder
os.makedirs('/content/drive/MyDrive/CovidDetection/submission', exist_ok=True)
%cd /content/drive/MyDrive/CovidDetection/Submission
!ls

In [None]:
# To download the raw YOLOv5 model
!git clone https://github.com/ultralytics/yolov5  # clone repo

In [None]:
import torch
print(f"Setup complete. Using torch {torch.__version__} ({torch.cuda.get_device_properties(0).name if torch.cuda.is_available() else 'CPU'})")

In [None]:
# Necessary/extra dependencies. 
import os
import gc
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm
from shutil import copyfile
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

#customize iPython writefile so we can write variables
from IPython.core.magic import register_line_cell_magic

@register_line_cell_magic
def writetemplate(line, cell):
    with open(line, 'w') as f:
        f.write(cell.format(**globals()))

In [None]:
info = pd.read_csv("/content/drive/MyDrive/CovidDetection/dataset/info/info_summary.csv")
print(f'Size of dataset: {len(info[info.data=])}, training images: {len(train_df)}. validation images: {len(val_df)}')

## Part 2-1 Exp 1
Baseline 
YOLO v5s

In [None]:
os.makedirs('/content/drive/MyDrive/CovidDetection/submission/dataset/baseline/images/train', exist_ok=True)
os.makedirs('/content/drive/MyDrive/CovidDetection/submission/dataset/baseline/images/val', exist_ok=True)

os.makedirs('/content/drive/MyDrive/CovidDetection/submission/dataset/baseline/labels/train', exist_ok=True)
os.makedirs('/content/drive/MyDrive/CovidDetection/submission/dataset/baseline/labels/val', exist_ok=True)

In [None]:
# Move the images to relevant split folder.
for i in tqdm(range(len(df))):
    row = df.iloc[i]
    if row.data == 'train':
        copyfile(row.filepath, f'/content/drive/MyDrive/CovidDetection/submission/dataset/baseline/images/train/{row.image_id}.jpg')
    else:
        copyfile(row.filepath, f'/content/drive/MyDrive/CovidDetection/submission/dataset/baseline/images/val/{row.image_id}.jpg')

In [None]:
# Create .yaml file 
import yaml

data_yaml = dict(
    train = '/content/drive/MyDrive/CovidDetection/submission/dataset/baseline/images/train',
    val = '/content/drive/MyDrive/CovidDetection/submission/dataset/baseline/images/val',
    nc = 2,
    names = ['none', 'opacity']
)

# Note that I am creating the file in the yolov5/data/ directory.
with open('/content/drive/MyDrive/CovidDetection/submission/src/yolo.yaml', 'w') as outfile:
    yaml.dump(data_yaml, outfile, default_flow_style=True)
    
%cat /content/drive/MyDrive/CovidDetection/submission/src/yolo.yaml

In [None]:
# Get the raw bounding box by parsing the row value of the label column.

def get_bbox(row):
    bboxes = []
    bbox = []
    for i, l in enumerate(row.label.split(' ')):
        if (i % 6 == 0) | (i % 6 == 1):
            continue
        bbox.append(float(l))
        if i % 6 == 5:
            bboxes.append(bbox)
            bbox = []  
            
    return bboxes

# Scale the bounding boxes according to the size of the resized image. 
def scale_bbox(row, bboxes):
    # Get scaling factor
    scale_x = IMG_SIZE/row.width
    scale_y = IMG_SIZE/row.height
    
    scaled_bboxes = []
    for bbox in bboxes:
        x = int(np.round(bbox[0]*scale_x, 4))
        y = int(np.round(bbox[1]*scale_y, 4))
        x1 = int(np.round(bbox[2]*(scale_x), 4))
        y1= int(np.round(bbox[3]*scale_y, 4))

        scaled_bboxes.append([x, y, x1, y1]) # xmin, ymin, xmax, ymax
        
    return scaled_bboxes

# Convert the bounding boxes in YOLO format.
def get_yolo_format_bbox(img_w, img_h, bboxes):
    yolo_boxes = []
    for bbox in bboxes:
        w = bbox[2] - bbox[0] # xmax - xmin
        h = bbox[3] - bbox[1] # ymax - ymin
        xc = bbox[0] + int(np.round(w/2)) # xmin + width/2
        yc = bbox[1] + int(np.round(h/2)) # ymin + height/2
        
        yolo_boxes.append([xc/img_w, yc/img_h, w/img_w, h/img_h]) # x_center y_center width height
    
    return yolo_boxes

In [None]:
# Prepare the txt files for bounding box
for i in tqdm(range(len(info))):
    row = info.loc[i]
    # Get image id
    img_id = row.image_id
    # Get split
    split = row.data
    # Get image-level label
    label = row.image_level
    
    if row.data=='train':
        file_name = f'/content/drive/MyDrive/CovidDetection/submission/dataset/labels/train/{row.image_id}.txt'
    else:
        file_name = f'/content/drive/MyDrive/CovidDetection/submission/dataset/labels/val/{row.image_id}.txt'
        
    
    if label=='opacity':
        # Get bboxes
        bboxes = get_bbox(row)
        # Scale bounding boxes
        scale_bboxes = scale_bbox(row, bboxes)
        # Format for YOLOv5
        yolo_bboxes = get_yolo_format_bbox(IMG_SIZE, IMG_SIZE, scale_bboxes)
        
        with open(file_name, 'w') as f:
            for bbox in yolo_bboxes:
                bbox = [1]+bbox
                bbox = [str(i) for i in bbox]
                bbox = ' '.join(bbox)
                f.write(bbox)
                f.write('\n')

In [None]:
TRAIN_PATH = "/content/drive/MyDrive/CovidDetection/dataset/tmp/train/"
WEIGHT_PATH="/content/drive/MyDrive/CovidDetection/submission/yolov5/yolov5s.pt"
IMG_SIZE = 256
BATCH_SIZE = 16
EPOCHS = 10
TEST_PATH = "/content/drive/MyDrive/siim/tmp/test"
BEST_MODEL_PATH  ="/content/drive/MyDrive/siim/version3/yolov5/version3/exp/weights/best.pt"
YAML_PATH="/content/drive/MyDrive/CovidDetection/submission/src/yolo.yaml"

In [None]:
!python /content/drive/MyDrive/CovidDetection/submission/yolov5/train.py --img {IMG_SIZE} \
                 --batch {BATCH_SIZE} \
                 --epochs {EPOCHS} \
                 --data {YAML_PATH} \
                 --weights {WIGHT_PATH} \
                 --save_period 1\
                 --project submission

In [None]:
!python /content/drive/MyDrive/CovidDetection/submission/yolov5/detect.py --weights {BEST_MODEL_PATH} \
                  --source {TEST_PATH} \
                  --img 256 \
                  --conf 0.281 \
                  --iou-thres 0.5 \
                  --max-det 3 \
                  --save-txt \
                  --save-conf

## Part 2-2 Exp 2
YOLOv3

In [None]:
%cd /content/drive/MyDrive/CovidDetection/submission
!git clone https://github.com/ultralytics/yolov3

In [None]:
TRAIN_PATH = "/content/drive/MyDrive/CovidDetection/dataset/tmp/train/"
WEIGHT_PATH="/content/drive/MyDrive/CovidDetection/submission/yolov3/yolov3.pt"
IMG_SIZE = 256
BATCH_SIZE = 16
EPOCHS = 10
TEST_PATH = "/content/drive/MyDrive/siim/tmp/test"
BEST_MODEL_PATH  ="/content/drive/MyDrive/siim/version3/yolov5/version3/exp/weights/best.pt"
YAML_PATH="/content/drive/MyDrive/CovidDetection/submission/src/yolo.yaml"

In [None]:
!python /content/drive/MyDrive/CovidDetection/submission/yolov3/train.py --img {IMG_SIZE} \
                 --batch {BATCH_SIZE} \
                 --epochs {EPOCHS} \
                 --data {YAML_PATH} \
                 --weights {WIGHT_PATH} \
                 --save_period 1\
                 --project submission

In [None]:
!python /content/drive/MyDrive/CovidDetection/submission/yolov3/detect.py --weights {BEST_MODEL_PATH} \
                  --source {TEST_PATH} \
                  --img 256 \
                  --conf 0.281 \
                  --iou-thres 0.5 \
                  --max-det 3 \
                  --save-txt \
                  --save-conf

## Part 2-3 Exp 3
YOLOv5x

In [None]:
TRAIN_PATH = "/content/drive/MyDrive/CovidDetection/dataset/tmp/train/"
WEIGHT_PATH="/content/drive/MyDrive/CovidDetection/submission/yolov5/yolov5x.pt"
IMG_SIZE = 256
BATCH_SIZE = 16
EPOCHS = 10
TEST_PATH = "/content/drive/MyDrive/siim/tmp/test"
BEST_MODEL_PATH  ="/content/drive/MyDrive/siim/version3/yolov5/version3/exp/weights/best.pt"
YAML_PATH="/content/drive/MyDrive/CovidDetection/submission/src/yolo.yaml"

In [None]:
!python /content/drive/MyDrive/CovidDetection/submission/yolov5/train.py --img {IMG_SIZE} \
                 --batch {BATCH_SIZE} \
                 --epochs {EPOCHS} \
                 --data {YAML_PATH} \
                 --weights {WIGHT_PATH} \
                 --save_period 1\
                 --project submission

In [None]:
!python /content/drive/MyDrive/CovidDetection/submission/yolov5/detect.py --weights {BEST_MODEL_PATH} \
                  --source {TEST_PATH} \
                  --img 256 \
                  --conf 0.281 \
                  --iou-thres 0.5 \
                  --max-det 3 \
                  --save-txt \
                  --save-conf

## Part 2-4 Exp 4
Fuzzy Logic+Yolov5s

### Part 2-4-1 Fuzzy logic: feature extraction

In [None]:
import cv2
import math
import tqdm.notebook as tq
import numpy as np
import pandas as pd
import operator as op
from functools import reduce
from scipy.interpolate import interp1d
from google.colab.patches import cv2_imshow

In [None]:
epsilon = 0.00001
n=2
m=2
gamma=4
img_name = '0a0bb7af0cab'
test_folder = '/content/drive/MyDrive/siim/'
dir="/content/drive/MyDrive/siim/"

In [None]:
def add(epsilon , x1, x2):
    return (x1 + x2) / (1 + (x1 * x2) + epsilon)

def subtract(epsilon ,x1, x2):
    return (x1 - x2) / (1 - (x1 * x2) + epsilon)

def mult(epsilon, lamda, x):
    nom = ((1 + x) ** lamda) - ((1 - x) ** lamda)
    denom = ((1 + x) ** lamda) + ((1 - x) ** lamda)
    return nom / (denom+epsilon )

def fai(epsilon ,x):
    return 0.5 * np.log((1 + x) / (1 - x + epsilon) )

def norm(epsilon ,x):
    return np.abs(fai(epsilon,x))

def comb(n, r):
    r = min(r, n - r)
    numer = reduce(op.mul, range(n, n - r, -1), 1)
    denom = reduce(op.mul, range(1, r + 1), 1)
    return numer / denom

def colorAdd(q1,q2):
    res = []
    res.append(add(epsilon, q1[0] , q2[0]))
    res.append(add(epsilon, q1[1] , q2[1]))
    res.append(add(epsilon, q1[2] , q2[2]))
    return res


def colorSub(q1,q2):
    res = []
    res.append(subtract(epsilon, q1[0] , q2[0]))
    res.append(subtract(epsilon, q1[1] , q2[1]))
    res.append(subtract(epsilon, q1[2] , q2[2]))
    return res

def colorMult(lamda,q):
    res = []
    res.append(mult(epsilon, lamda, q[0]))
    res.append(mult(epsilon, lamda, q[1]))
    res.append(mult(epsilon, lamda, q[2]))
    return res

def colorNorm(q):
    res = 0
    res += fai(epsilon, q[0])**2
    res += fai(epsilon, q[1])**2
    res += fai(epsilon, q[2])**2
    return np.sqrt(res)

In [None]:
class ImageEnh:
    def __init__(self, image, n, m, gamma):
        self.image = image
        self.n = n
        self.m = m
        self.gamma = gamma
        self.pixelMemberships = np.full((n, m, image.shape[0], image.shape[1]), -1000 , dtype=np.float)
        self.windowsMean = np.full((n, m), -1000 ,dtype=np.float)
        self.windowsCard = np.full((n, m), -1000 , dtype=np.float)
        self.pijMat = np.full((n, m, image.shape[0], image.shape[1]), -1000,dtype=np.float)
        self.windowsVariance = np.full((n, m), -1000,dtype=np.float)
        self.image = self.convertImgDown(self.image)

    def convertImgDown(self, image):
        mapping = interp1d([0, 255], [-1, 1])
        image = mapping(image)
        #         for x in range(self.image.shape[0]):
        #             for y in range(self.image.shape[1]):
        #                 self.image[x][y] = (((self.image[x][y]) * (2)) / (255)) - 1
        return image

    def convertImgUp(self, img):
        num1 = np.abs(np.min(img))
        num2 = np.abs(np.max(img))
        mapping = interp1d([-1*max(num1,num2), max(num1,num2)], [0, 255])
        img = mapping(img)
        #         new_img = np.zeros((self.image.shape[0] , self.image.shape[1]))
        #         for x in range(self.image.shape[0]):
        #             for y in range(self.image.shape[1]):
        #                 new_img[x][y] = (((img[x][y] + 1) * (255)) / (2))
        return img

    def qxi(self, i, x):
        x0 = 0
        x1 = self.image.shape[0]
        nCi = comb(self.n, i)
        nom = ((x - x0) ** i) * ((x1 - x) ** (self.n - i))
        denom = (x1 - x0) ** self.n
        ans = nCi * nom / denom
        # if ans > 1 or ans < 0:
        #     print('Error in qxi : ', ans)
        return ans

    def qyj(self, j, y):
        y0 = 0
        y1 = self.image.shape[1]
        nCi = comb(self.m, j)
        nom = (np.power((y - y0),j)) * (np.power((y1 - y) , (self.m - j)))
        denom = (y1 - y0) ** self.m
        ans = nCi * nom / denom
        # if ans > 1 or ans < 0:
        #     print('Error in qyj : ', ans)
        return ans

    def pij(self, i, j, x, y):
        if self.pijMat[i][j][x][y] == -1000:
            ans = self.qxi(i, x) * self.qyj(j, y)
            # if ans > 1 or ans < 0:
            #     print('Error in pij : ', i, j, ans)
            self.pijMat[i][j][x][y] = ans
        return self.pijMat[i][j][x][y]

    def membership(self, i, j, x, y):
        if self.pixelMemberships[i][j][x][y] == -1000:
            nom = self.pij(i, j, x, y) ** self.gamma
            denom = 0
            for idx1 in range(self.n):
                for idx2 in range(self.m):
                    denom += np.power(self.pij(idx1, idx2, x, y), self.gamma)
            ans = nom / (denom + epsilon)
            if ans > 1 or ans < 0 or math.isnan(ans):
                print('Error in membership : ', denom)
            self.pixelMemberships[i][j][x][y] = ans
        return self.pixelMemberships[i][j][x][y]

    def windowCard(self, i, j):
        if self.windowsCard[i][j] == -1000:
            card = 0.0
            for x in range(self.image.shape[0]):
                for y in range(self.image.shape[1]):
                    card += self.membership(i, j, x, y)
            self.windowsCard[i][j] = card
        return self.windowsCard[i][j]

    def windowMean(self, i, j):
        if self.windowsMean[i][j] == -1000:
            card = self.windowCard(i, j)
            mean = 0.0
            for x in range(self.image.shape[0]):
                for y in range(self.image.shape[1]):
                    mean = add(epsilon, mean, mult(epsilon, self.membership(i, j, x, y) / card, self.image[x][y]))
            self.windowsMean[i][j] = mean
        return self.windowsMean[i][j]

    def windowVar(self, i, j):
        if self.windowsVariance[i][j] == -1000:
            var = 0.0
            card = self.windowCard(i, j)
            mean = self.windowMean(i, j)
            for x in range(self.image.shape[0]):
                for y in range(self.image.shape[1]):
                    memship = self.membership(i, j, x, y)
                    nom = memship * (norm(epsilon, subtract(epsilon, self.image[x][y], mean)) ** 2)
                    denom = card
                    var += nom / denom
            self.windowsVariance[i][j] = var
        return self.windowsVariance[i][j]

    def enhanceImage(self):
        image_copy = np.zeros((self.image.shape[0], self.image.shape[1]))
        sigma = np.sqrt(1/3)
        for i in range(self.n):
            for j in range(self.m):
                mean = self.windowMean(i, j)
                variance = np.sqrt(self.windowVar(i, j))
                for x in range(self.image.shape[0]):
                    for y in range(self.image.shape[1]):
                        left = sigma / variance
                        psi = mult(epsilon, left, subtract(epsilon,self.image[x][y], mean))
                        memship = self.membership(i, j, x, y)
                        image_copy[x][y] += mult(epsilon, memship, psi)
        image_copy = self.convertImgUp(image_copy)
        return image_copy

In [None]:
class ColoredImageEnh:
    def __init__(self, image, n, m, gamma):
        """
        Initializing the model and it's variables
        :param image: image to be enhanced
        :param n: number of windows in width
        :param m: number of windows in height
        :param gamma: fuzzification coffecient
        """
        self.image = image
        self.n = n
        self.m = m
        self.gamma = gamma

        # Membership matrix of pixels to each window
        self.pixelMemberships = np.full((n, m, image.shape[0], image.shape[1]), -1000, dtype=np.float)
        # Mean of each window
        self.windowsMean = np.full((n, m), -1000, dtype=np.float)
        # Cardinality of each window
        self.windowsCard = np.full((n, m), -1000, dtype=np.float)
        self.pijMat = np.full((n, m, image.shape[0], image.shape[1]), -1000, dtype=np.float)
        # Variance of each window
        self.windowsVariance = np.full((n, m), -1000, dtype=np.float)

        self.image = self.convertImgDown(self.image)

        # Luminosity matrix of colored image
        self.lum = np.full((image.shape[0] , image.shape[1]) , -1000 , dtype=np.float)

    def convertImgDown(self, image):
        """
        Mapping pixel values from interval [0 , 255] to [-1 , 1]
        :param image: image needed to be converted
        :return: image after mapping it to interval [-1 , 1]
        """
        mapping = interp1d([0, 255], [-1, 1])
        image = mapping(image)
        return image

    def convertImgUp(self, img):
        """
        Mapping pixel values from interval [-1 , 1] to interval [0 , 255]
        :param img: image needed to be converted
        :return: image after mapping
        """
        mapping = interp1d([np.min(img), np.max(img)], [0, 255])
        img = mapping(img)
        return img

    def imageLuminosity(self,i,j):
        """
        Calculating the Luminosity of colored image at index i , j
        :param i: row index
        :param j: column index
        :return: The luminosity value at index [i j]
        """
        if self.lum[i][j] == -1000:
            for x in range(self.image.shape[0]):
                for y in range(self.image.shape[1]):
                    temp = add(epsilon, self.image[x][y][2] , self.image[x][y][1])
                    temp = add(epsilon, temp , self.image[x][y][0])
                    self.lum[x][y] = mult(epsilon, 1/3 , temp)
        return self.lum[i][j]


    def qxi(self, i, x):
        """
        Calculating formula:
        :param i:
        :param x:
        :return:
        """
        x0 = 0
        x1 = self.image.shape[0]
        nCi = comb(self.n, i)
        nom = ((x - x0) ** i) * ((x1 - x) ** (self.n - i))
        denom = (x1 - x0) ** self.n
        ans = nCi * nom / denom
        return ans

    def qyj(self, j, y):
        y0 = 0
        y1 = self.image.shape[1]
        nCi = comb(self.m, j)
        nom = (np.power((y - y0), j)) * (np.power((y1 - y), (self.m - j)))
        denom = (y1 - y0) ** self.m
        ans = nCi * nom / denom
        return ans

    def pij(self, i, j, x, y):
        if self.pijMat[i][j][x][y] == -1000:
            ans = self.qxi(i, x) * self.qyj(j, y)
            self.pijMat[i][j][x][y] = ans
        return self.pijMat[i][j][x][y]

    def membership(self, i, j, x, y):
        """
        Calculating the membership of pixel [x][y] to a window i,j
        :param i: window row index
        :param j: window column index
        :param x: pixel row index
        :param y: pixel column index
        :return: Membership value
        """
        if self.pixelMemberships[i][j][x][y] == -1000:
            nom = self.pij(i, j, x, y) ** self.gamma
            denom = 0
            for idx1 in range(self.n):
                for idx2 in range(self.m):
                    denom += np.power(self.pij(idx1, idx2, x, y), self.gamma)
            ans = nom / (denom + epsilon)
            self.pixelMemberships[i][j][x][y] = ans
        return self.pixelMemberships[i][j][x][y]

    def windowCard(self, i, j):
        """
        Calculating the cardinality of window i,j
        :param i:
        :param j:
        :return: window card value
        """
        if self.windowsCard[i][j] == -1000:
            card = 0.0
            for x in range(self.image.shape[0]):
                for y in range(self.image.shape[1]):
                    card += self.membership(i, j, x, y)
            self.windowsCard[i][j] = card
        return self.windowsCard[i][j]

    def windowMean(self, i, j):
        """
        Calculating the mean of window i,j
        :param i:
        :param j:
        :return: Mean value
        """
        if self.windowsMean[i][j] == -1000:
            card = self.windowCard(i, j)
            mean = 0.0
            for x in range(self.image.shape[0]):
                for y in range(self.image.shape[1]):
                    mean = add(epsilon, mean, mult(epsilon, self.membership(i, j, x, y) / card, self.imageLuminosity(x,y)))
            self.windowsMean[i][j] = mean
        return self.windowsMean[i][j]

    def windowVar(self, i, j):
        """
        Calculating the squared Variance of a window
        :param i:
        :param j:
        :return: Squared variance value
        """
        if self.windowsVariance[i][j] == -1000:
            var = 0.0
            card = self.windowCard(i, j)
            mean = self.windowMean(i, j)
            for x in range(self.image.shape[0]):
                for y in range(self.image.shape[1]):
                    memship = self.membership(i, j, x, y)
                    nom = memship * (norm(epsilon, subtract(epsilon, self.imageLuminosity(x,y), mean)) ** 2)
                    denom = card
                    var += nom / denom
            self.windowsVariance[i][j] = var
        return self.windowsVariance[i][j]

    def imageEnhance(self):
        """
        Iterating over the channels , pixel and windows and calculate the new image after enhancing
        :return: Enhanced image after converting it to interval[0,255]
        """
        final_img = np.zeros((self.image.shape[0] , self.image.shape[1] , 3) , dtype=np.float)
        sigma = np.sqrt(1/3)
        for chn in range(3):
            for i in range(self.n):
                for j in range(self.m):
                    var = np.sqrt(self.windowVar(i,j))
                    mean = self.windowMean(i,j)
                    for x in range(self.image.shape[0]):
                        for y in range(self.image.shape[1]):
                            memship = self.membership(i, j, x, y)
                            final_img[x][y][chn] += mult(epsilon, (memship*sigma/var) , subtract(epsilon, self.image[x][y][chn] , mean))

        final_img = self.convertImgUp(final_img)

        return final_img

In [None]:
def colored_enhancing(filepath, gray_tarpath):
    img = cv2.imread(filepath)
    #print(img.shape)
    imgEnh = ColoredImageEnh(img, n, m, gamma)
    final_image = imgEnh.imageEnhance()
    final_image = np.array(final_image, dtype=np.uint8)
    #cv2_imshow(img)
    #cv2_imshow(final_image)
    #cv2.imshow('before converting', img)
    #cv2.imshow('final', final_image)
    cv2.imwrite(gray_tarpath, final_image)
    #print(final_image)
    #cv2.waitKey(1)
    #cv2.destroyAllWindows()

def gray_enhancing(filepath, colored_tarpath):
    img = cv2.imread(filepath)
    #print(img.shape)
    img = cv2.cvtColor(img , cv2.COLOR_BGR2GRAY)
    imgEnh = ImageEnh(img ,n,m,gamma)
    final_image = imgEnh.enhanceImage()
    final_image = np.array(final_image , dtype = np.uint8)
    #cv2_imshow(img)
    #cv2_imshow(final_image)
    #cv2_imshow('before converting' , img)
    #cv2_imshow('final' , final_image)
    cv2.imwrite(colored_tarpath, final_image)
    #cv2.waitKey(1)
    #cv2.destroyAllWindows()

In [None]:
os.makedirs('/content/drive/MyDrive/CovidDetection/dataset/fgtmp/train', exist_ok=True)
os.makedirs('/content/drive/MyDrive/CovidDetection/dataset/fgtmp/test', exist_ok=True)

os.makedirs('/content/drive/MyDrive/CovidDetection/dataset/fctmp/train', exist_ok=True)
os.makedirs('/content/drive/MyDrive/CovidDetection/dataset/fctmp/test', exist_ok=True)

In [None]:
info.shape

In [None]:
# first time 893 seccess
# second time 1160 seccess
# third time 650 success
# forth time 761 success
# fifth time 382 success
# sixth time 91 success
# seventh time 996 success
# eighth time 987 success
# ninth time 416 success
for i in tq.tqdm(range(5918,info.shape[0])):
    row=info.iloc[i]
    filepath=row.filepath
    gray_tarpath=row.fgfp
    #print(gray_tarpath)
    colored_tarpath=row.fcfp
    #print(colored_tarpath)d
    gray_enhancing(filepath, gray_tarpath)
    colored_enhancing(filepath, colored_tarpath)

### Part 2-4-2 YOLOv5s
- Fuzzy colored images
- Fuzzy gray images

In [None]:
TRAIN_PATH = "/content/drive/MyDrive/CovidDetection/dataset/tmp/train/"
WEIGHT_PATH="/content/drive/MyDrive/CovidDetection/submission/yolov5/yolov5x.pt"
IMG_SIZE = 256
BATCH_SIZE = 16
EPOCHS = 10
TEST_PATH = "/content/drive/MyDrive/siim/tmp/test"
BEST_MODEL_PATH  ="/content/drive/MyDrive/siim/version3/yolov5/version3/exp/weights/best.pt"
YAML_PATH="/content/drive/MyDrive/CovidDetection/submission/src/yolo.yaml"

In [None]:
!python /content/drive/MyDrive/CovidDetection/submission/yolov5/train.py --img {IMG_SIZE} \
                 --batch {BATCH_SIZE} \
                 --epochs {EPOCHS} \
                 --data {YAML_PATH} \
                 --weights {WIGHT_PATH} \
                 --save_period 1\
                 --project submission

In [None]:
!python /content/drive/MyDrive/CovidDetection/submission/yolov5/detect.py --weights {BEST_MODEL_PATH} \
                  --source {TEST_PATH} \
                  --img 256 \
                  --conf 0.281 \
                  --iou-thres 0.5 \
                  --max-det 3 \
                  --save-txt \
                  --save-conf

In [None]:
TRAIN_PATH = "/content/drive/MyDrive/CovidDetection/dataset/tmp/train/"
WEIGHT_PATH="/content/drive/MyDrive/CovidDetection/submission/yolov5/yolov5x.pt"
IMG_SIZE = 256
BATCH_SIZE = 16
EPOCHS = 10
TEST_PATH = "/content/drive/MyDrive/siim/tmp/test"
BEST_MODEL_PATH  ="/content/drive/MyDrive/siim/version3/yolov5/version3/exp/weights/best.pt"
YAML_PATH="/content/drive/MyDrive/CovidDetection/submission/src/yolo.yaml"

In [None]:
!python /content/drive/MyDrive/CovidDetection/submission/yolov5/train.py --img {IMG_SIZE} \
                 --batch {BATCH_SIZE} \
                 --epochs {EPOCHS} \
                 --data {YAML_PATH} \
                 --weights {WIGHT_PATH} \
                 --save_period 1\
                 --project submission

In [None]:
!python /content/drive/MyDrive/CovidDetection/submission/yolov5/detect.py --weights {BEST_MODEL_PATH} \
                  --source {TEST_PATH} \
                  --img 256 \
                  --conf 0.281 \
                  --iou-thres 0.5 \
                  --max-det 3 \
                  --save-txt \
                  --save-conf

## Part 2-5 Choquet Intergral

In [None]:
import torch
import numpy as np

### Part 2-5-1 CHI NN
Unknown reason to crash the Google terminal
But the programme works well in my personal laptop, unfortunetely, I don't have GPU, so I can't support the last dl.

In [None]:
# Convert decimal to binary string
def sources_and_subsets_nodes(N):
  str1 = "{0:{fill}"+str(N)+"b}"
  a = []
  for i in range(1,2**N):
    a.append(str1.format(i, fill='0'))

  sourcesInNode = []
  sourcesNotInNode = []
  subset = []
  sourceList = list(range(N))

  # find subset nodes of a node
  def node_subset(node, sourcesInNodes):
    return [node - 2**(i) for i in sourcesInNodes]

  # convert binary encoded string to integer list
  def string_to_integer_array(s, ch):
    N = len(s) 
    return [(N - i - 1) for i, ltr in enumerate(s) if ltr == ch]

  for j in range(len(a)):
  # index from right to left
    idxLR = string_to_integer_array(a[j],'1')
    sourcesInNode.append(idxLR)  
    sourcesNotInNode.append(list(set(sourceList) - set(idxLR)))
    subset.append(node_subset(j,idxLR))

  print("sources_and_subsets_nodes")
  return sourcesInNode, subset

def subset_to_indices(indices):
  print("subset_to_indeces")
  return [i for i in indices]

In [None]:
class Choquet_integral(torch.nn.Module):
  def __init__(self, N_in, N_out):
    super(Choquet_integral,self).__init__()
    self.N_in = N_in
    self.N_out = N_out
    self.nVars = 2**self.N_in - 2

    # The FM is initialized with mean
    dummy = (1./self.N_in) * torch.ones((self.nVars, self.N_out), requires_grad=True)

    # self.vars = torch.nn.Parameter( torch.Tensor(self.nVars,N_out))
    self.vars = torch.nn.Parameter(dummy)

    # following function uses numpy vs pytorch
    self.sourcesInNode, self.subset = sources_and_subsets_nodes(self.N_in)
    self.sourcesInNode = [torch.tensor(x) for x in self.sourcesInNode]
    self.subset = [torch.tensor(x) for x in self.subset]
    print("self.subset/n",self.subset)

  def forward(self,inputs):    
    self.FM = self.chi_nn_vars(self.vars)
    sortInputs, sortInd = torch.sort(inputs,1, True)
    M, N = inputs.size()
    sortInputs = torch.cat((sortInputs, torch.zeros(M,1)), 1)
    sortInputs = sortInputs[:,:-1] -  sortInputs[:,1:]
    out = torch.cumsum(torch.pow(2,sortInd),1) - torch.ones(1, dtype=torch.int64)
    data = torch.zeros((M,self.nVars+1))
    
    for i in range(M):
      data[i,out[i,:]] = sortInputs[i,:] 
      ChI = torch.matmul(data,self.FM)

    print("forward")
    return ChI

  # Converts NN-vars to FM vars
  def chi_nn_vars(self, chi_vars):
    # nVars,_ = chi_vars.size()
    chi_vars = torch.abs(chi_vars)
    # nInputs = inputs.get_shape().as_list()[1]
    FM = chi_vars[None, 0,:]
    
    for i in range(1,self.nVars):
      indices = subset_to_indices(self.subset[i])
      if (len(indices) == 1):
        FM = torch.cat((FM,chi_vars[None,i,:]),0)
      else:
      # ss=tf.gather_nd(variables, [[1],[2]])
        maxVal,_ = torch.max(FM[indices,:],0)
        temp = torch.add(maxVal,chi_vars[i,:])
    FM = torch.cat((FM,temp[None,:]),0)
    FM = torch.cat([FM, torch.ones((1,self.N_out))],0)
    FM = torch.min(FM, torch.ones(1))  

    print("chi_nn_vars")
    return FM

In [None]:
# training samples size
M = 10
# number of inputs
N_in = 3
# number of outputs aka number of Choquet integral neurons
N_out = 2  
# Create a synthetic dataset via random sampling from a normal distribution with mean =-1 and std=2
X_train = np.random.rand(M,N_in)*2-1
# Let's specify the FMs  (There will be N_out number of FMs)
# Herein we adopt binary encoding instead of lexicographic encoding to represent a FM that is easier to code. 
# As for example, an FM for three inputs using lexicographic encoding is, g = {g_1, g_2, g_3, g_{12}, g_{13}, g_{23}, g_{123}}.
# whereas its binary encoding is g = {g_1, g_2, g_{12}, g_3 g_{13}, g_{23}, g_{123}}.
# For simplicity, here we use OWA. 
print(X_train)
OWA = np.array([[0.7, 0.2, 0.1], # this is soft-max
                    [0.1,0.2,0.7]])  # soft-min
# The FMs of the above OWAs in binary encoding
# FM = [[0.7, 0.7, 0.9, 0.7, 0.9, 0.9, 1.0].
#      [0.1, 0.1, 0.3, 0.1, 0.3, 0.3, 1.0]]
print('Actual/groundtruth FMs in binary encoding:')
print('FM1 = ', np.array([0.7, 0.7, 0.9, 0.7, 0.9, 0.9, 1.0]))
print('FM2 = ', np.array([0.1, 0.1, 0.3, 0.1, 0.3, 0.3, 1.0]))

# Generate the label or the groundtruth based on the provided FMs/OWAs. The labels are two dimentional
label_train = np.matmul(np.sort(X_train), np.fliplr(OWA).T)
    
# Now we want to recover the FMs from the training data and groundtruth
# First, build a Choquet integral neuron with N_in inputs and N_out outputs
net = Choquet_integral(N_in,N_out)
    
# set the optimization algorithms and paramters the learning
learning_rate = 0.3;
    
# Construct our loss function and an Optimizer. The call to model.parameters()
# in the SGD constructor will contain the learnable parameters of the two
# nn.Linear modules which are members of the model.
criterion = torch.nn.MSELoss(reduction='mean')
optimizer = torch.optim.SGD(net.parameters(), lr=learning_rate)   
    
num_epochs = 300;
    
# convert from numpy to torch tensor
X_train = torch.tensor(X_train,dtype=torch.float) #.to("cuda")
label_train = torch.tensor(label_train,dtype=torch.float) #.to("cuda")

model.train()
# optimize
for t in range(num_epochs):
# Forward pass: Compute predicted y by passing x to the model

## Terminal crashed here! Tensor is not transfered into GPU?! But to("cuda") doesn't work.
  y_pred = net(X_train)

# Compute the loss
  loss = criterion(y_pred, label_train)
# Zero gradients, perform a backward pass, and update the weights.
  optimizer.zero_grad()
  loss.backward()
  optimizer.step()

# Finally, the learned FMs
FM_learned = (net.chi_nn_vars(net.vars).cpu()).detach().numpy()
print('\n\nLearned FMs:')
print('FM1 = ', FM_learned[:,0])
print('FM2 = ',FM_learned[:,1])

### Part 2-5-2 CHI

In [None]:
def _differentiation_1_distance(X):
    #Perform differentiation for each consecuent point in the X dataset (time series)
    print("dis:")
    print(X[0], X[1:] - X[0:-1])
    return np.append(X[0], X[1:] - X[0:-1])


def generate_cardinality(N, p = 2):
    '''
    Generate the cardinality measure for a N-sized vector.
    '''
    print("FM:")
    for x in np.arange(N, 0, -1):
      print((x/ N)**p) 
    return [(x/ N)**p for x in np.arange(N, 0, -1)]


def generate_cardinality_matrix(N, matrix_shape, p = 2):
    '''
    Generate the cardinality measure for a N-sized vector, and returns it in a matrix shape.
    Use this if you cannot broadcast generate_cardinality() correctly.
    N and matrix_shape must be coherent (matrix_shape[0] == N)
    '''
    res = np.zeros(matrix_shape)
    dif_elements = [(x/ N)**p for x in np.arange(N, 0, -1)]

    for ix, elements in enumerate(dif_elements ):
        res[ix,...] = dif_elements[ix]

    return res


def choquet_integral_symmetric(X, measure=None, axis=0, keepdims=True):
    '''
    Aggregates a numpy array alongise an axis using the choquet integral.
    
    :param X: Data to aggregate.
    :param measure: Vector containing the measure numeric values (Symmetric!)
    :param axis: Axis alongside to aggregate.
    '''
    if measure is None:
        measure = generate_cardinality(
            X.shape[axis])

    X_sorted = np.sort(X, axis = axis)

    X_differenced = np.apply_along_axis(
    _differentiation_1_distance, axis, X_sorted)
    X_agg  = np.apply_along_axis(lambda a: np.dot(a, measure), axis, X_differenced)

    if keepdims:
        X_agg = np.expand_dims(X_agg, axis=axis)

    return X_agg



def sugeno_fuzzy_integral(X, measure=None, axis = 0, keepdims=True):
    '''
    Aggregates data using a generalization of the Choquet integral.
    
    :param X: Data to aggregate.
    :param measure: Vector containing the measure numeric values.
    :param axis: Axis alongside to aggregate.
    '''
    if measure is None:
        measure = generate_cardinality(
                X.shape[axis])

    return sugeno_fuzzy_integral_generalized(X, measure, axis, np.minimum, np.amax, keepdims)

In [None]:
choquet_integral_symmetric(np.array([0.2,0.9,0.8]))
sugeno_fuzzy_integral(np.array([0.2,0.9,0.8]))