**Retrieve the data from Kaggle.**

In [1]:
! pip install -q kaggle

In [2]:
from google.colab import files
import numpy as np

In [4]:
files.upload()

Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"salahmahmoud1911","key":"e2cd208322511ed2cb1017959e8952ce"}'}

In [5]:
! mkdir ~/.kaggle

In [6]:
! cp kaggle.json ~/.kaggle/

In [7]:
! kaggle competitions download -c ieee-mansb-victoris-2-final-competition

Downloading ieee-mansb-victoris-2-final-competition.zip to /content
100% 956M/959M [00:11<00:00, 188MB/s]
100% 959M/959M [00:11<00:00, 88.1MB/s]


In [None]:
! unzip /content/ieee-mansb-victoris-2-final-competition.zip

**We need to prepare the data.**

In [71]:
#Convert 'train_Images_Source_2' into folders similar to 'train_Images_Source_1'

import os
import shutil

image_directory = "/content/train_Images_Source_2"

# Get the list of image files in the directory
image_files = os.listdir(image_directory)

# Iterate over each image file
for image_file in image_files:
    # Extract the class name from the image file name
    class_name = image_file.split("_")[0]

    # Create the class folder if it doesn't exist
    class_folder = os.path.join(image_directory, class_name)
    if not os.path.exists(class_folder):
        os.makedirs(class_folder)

    # Move the image file to the class folder
    image_path = os.path.join(image_directory, image_file)
    shutil.move(image_path, class_folder)

print("Folders created and images moved successfully.")

Folders created and images moved successfully.


In [10]:
#Organizing the training data into one folder.
for folder_name in ['Ascariasis','Babesia','Capillaria p','Enterobius v','Epidermophyton floccosum','Fasciolopsis buski',
                     'Hookworm egg','Hymenolepis diminuta','Hymenolepis nana','Leishmania','Opisthorchis viverrine' ,'Paragonimus spp','T. rubrum','Taenia spp','Trichuris trichiura']:

    source_dir1 = f'/content/train_Images_Source_1/{folder_name}'
    source_dir2 = f'/content/train_Images_Source_2/{folder_name}'

    destination_dir = f'merged_train_data/{folder_name}'


    # Copy source directory to destination directory
    shutil.copytree(source_dir1, destination_dir, dirs_exist_ok=True)
    shutil.copytree(source_dir2, destination_dir, dirs_exist_ok=True)  # dirs_exist_ok=True --> removes the error that arises.

In [None]:
pip install split-folders


In [16]:
import splitfolders

In [17]:
#Split data to (Train,val,test)

splitfolders = splitfolders.ratio('/content/merged_train_data', output="cell_images2", seed=42, ratio=(.7, .2, .1),group_prefix=None) # default values

Copying files: 11910 files [00:09, 1316.26 files/s]


In [22]:
#Arrange the images in the test data.
import os

# Define the directory containing your image files
directory = '/content/test_data'

# Get a list of all files in the directory
files = os.listdir(directory)

# Filter only the image files (e.g., *.jpg)
image_files = [file for file in files if file.lower().endswith(('.jpg'))]

# Sort the image files based on the numeric values in the file names
sorted_image_files = sorted(image_files, key=lambda x: int(''.join(filter(str.isdigit, x))))

# Calculate the number of digits needed for the highest index
num_digits = len(str(len(sorted_image_files) - 1))

# Rename the files with leading zeros to ensure proper sorting
for i, file in enumerate(sorted_image_files):
    extension = os.path.splitext(file)[1]
    new_name = f"{i:0{num_digits}}{extension}"
    os.rename(os.path.join(directory, file), os.path.join(directory, new_name))

print("Image files sorted successfully.")


Image files sorted successfully.


**Model**

In [None]:
!pip install tensorflow

In [None]:
import tensorflow as tf

In [None]:
pip install ultralytics

In [None]:
pip install pytorch_lightning

In [21]:
import os

from ultralytics import YOLO

# Load a model
model = YOLO("yolov8m-cls.pt")  # load a pretained model

results = model.train(
    data='/content/cell_images2',
    epochs=15,
    imgsz=224, # Specify 'adam' optimize  # Specify a custom learning rate
    augment=True
)

Ultralytics YOLOv8.0.181 🚀 Python-3.10.12 torch-2.0.1+cu118 CUDA:0 (Tesla T4, 15102MiB)
[34m[1mengine/trainer: [0mtask=classify, mode=train, model=yolov8m-cls.pt, data=/content/cell_images2, epochs=20, patience=50, batch=16, imgsz=224, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=None, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, vid_stride=1, stream_buffer=False, line_width=None, visualize=False, augment=True, agnostic_nms=False, classes=None, retina_masks=False, boxes=True, format=torchscript, keras

**Try the Model model**

In [None]:
pip install --upgrade torch ultralytics


In [None]:
import csv
from ultralytics import YOLO
import pandas as pd

In [None]:
# Load a model
model = YOLO('/content/runs/classify/train2/weights/best.pt')  # Load a custom model

# Predict with the model
results = model.predict('/content/test_data')  # Predict on an image

In [47]:
result_list = []

for r in results:
    names_dict = r.names
    probs = r.probs.data.tolist()
    result_list.append(np.argmax(probs))

# Create a DataFrame with columns for each class
df = pd.DataFrame(columns=[f'class_{i}' for i in range(15)])

# Fill the DataFrame with zeros
for i in range(15):
    df[f'class_{i}'] = np.where(np.array(result_list) == i, 1, 0)


In [64]:
df.head(2)

Unnamed: 0,class_0,class_1,class_2,class_3,class_4,class_5,class_6,class_7,class_8,class_9,class_10,class_11,class_12,class_13,class_14
0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0


In [67]:
df_sub = pd.read_csv(r"/content/Sample_submission.csv")

In [68]:
df_sub.head(2)

Unnamed: 0,image_name,class_0,class_1,class_2,class_3,class_4,class_5,class_6,class_7,class_8,class_9,class_10,class_11,class_12,class_13,class_14
0,0.jpg,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,1.jpg,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [69]:
# Get the common columns between df and df_sub
common_columns = df.columns.intersection(df_sub.columns)

# Update the common columns in df_sub starting from the second column with values from df
df_sub[df_sub.columns[1:]] = df[common_columns]

In [72]:
df_sub.head(2)

Unnamed: 0,image_name,class_0,class_1,class_2,class_3,class_4,class_5,class_6,class_7,class_8,class_9,class_10,class_11,class_12,class_13,class_14
0,0.jpg,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
1,1.jpg,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0


In [73]:
df_sub.to_csv('output.csv', index=False)
