In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# Install YOLOv8
!pip install ultralytics

In [None]:
import torch

# Set device to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")


In [None]:
os.environ['KAGGLE_USERNAME'] = 'meghnabiswal'
os.environ['KAGGLE_KEY'] = '609cfcaa4374acff4f39acbd2463f6b4'

In [None]:
import os
import random

# Paths to image and label directories
image_dir = '/kaggle/input/dlp-object-detection-week-10/final_dlp_data/final_dlp_data/train/images'
label_dir = '/kaggle/input/dlp-object-detection-week-10/final_dlp_data/final_dlp_data/train/labels'

image_files = [f for f in os.listdir(image_dir) if f.endswith(('.jpeg', '.png'))]


# Shuffle and split dataset into train (90%) and validation (10%)
random.seed(42)
random.shuffle(image_files)
train_size = int(0.9 * len(image_files))
train_dataset = image_files[:train_size]
val_dataset = image_files[train_size:]

# Print dataset sizes
print(f"Total dataset size: {len(image_files)}")
print(f"Training set size: {len(train_dataset)}")
print(f"Validation set size: {len(val_dataset)}")


In [None]:
train_images_path = "dataset/train/images"
train_labels_path = "dataset/train/labels"
val_images_path = "dataset/val/images"
val_labels_path = "dataset/val/labels"

os.makedirs(train_images_path,exist_ok=True)
os.makedirs(train_labels_path,exist_ok=True)
os.makedirs(val_images_path,exist_ok=True)
os.makedirs(val_labels_path,exist_ok=True)

In [None]:
import shutil
def copy_files(image_list, src_image_dir, src_label_dir, dest_image_dir, dest_label_dir):
    for image_name in image_list:
        # Copy image
        src_image_path = os.path.join(src_image_dir, image_name)
        dest_image_path = os.path.join(dest_image_dir, image_name)
        shutil.copy2(src_image_path, dest_image_path)

        # Copy corresponding label
        label_name = image_name.replace('.jpeg', '.txt')  # Assuming label files match image names
        src_label_path = os.path.join(src_label_dir, label_name)
        dest_label_path = os.path.join(dest_label_dir, label_name)
        
        shutil.copy2(src_label_path, dest_label_path)

# Copy train files
copy_files(train_dataset, image_dir, label_dir, train_images_path, train_labels_path)

# Copy validation files
copy_files(val_dataset, image_dir, label_dir, val_images_path, val_labels_path)


In [None]:
import yaml

# Define the dataset configuration
data = {
    'train': '/kaggle/working/dataset/train/images',  # Path to training images (relative to 'path')
    'val': '/kaggle/working/dataset/val/images',      # Path to validation images (relative to 'path')
    'nc': 6,                  # Number of classes
    'names': ["aegypti","albopictus","anopheles","culex","culiseta","japonicus/koreicus"]   # Class names
}

# Save to a YAML file
with open('/kaggle/working/dataset.yaml', 'w') as file:
    yaml.dump(data, file, default_flow_style=False)

print("YAML file created at /kaggle/working/dataset.yaml")


In [None]:
from ultralytics import YOLO

model = YOLO("yolov8m.pt")  # Use a better model size
results = model.train(
    data="dataset.yaml",
    epochs=30,
    imgsz=1024,
    device=0,
    batch=16
)


In [None]:
val_results = model.val()
print(val_results.box.map)

In [None]:
print(val_results.box.map50)

In [None]:
test_results = model.predict("/kaggle/input/dlp-object-detection-week-10/final_dlp_data/final_dlp_data/test/images",
                            save_txt=True,
                            save_conf=True,
                            conf=0.30,
                            iou=0.50)

In [None]:
predictions_dir = "/kaggle/working/runs/detect/train3/labels"

files_list = os.listdir(predictions_dir)
print(len(files_list))
with open(os.path.join(predictions_dir,files_list[49]), "r") as f:
    for line_id,line in enumerate(f):
        print(line)

In [None]:
import csv

# Define the output CSV file path
output_csv = "/kaggle/working/submission.csv"
test_path="/kaggle/input/dlp-object-detection-week-10/final_dlp_data/final_dlp_data/test/images"

with open(output_csv, mode="w", newline="") as file:
    writer = csv.writer(file)
    sno=0
    # Write header
    writer.writerow(["id", "ImageID", "LabelName", "Conf", "xcenter", "ycenter", "bbx_width", "bbx_height"])

    # Loop through all prediction files in the labels directory
    for img_file in os.listdir(test_path):
        txt_file = img_file.replace(".jpeg",".txt")
        
        image_id = img_file
        label_test_file_path = os.path.join(predictions_dir,txt_file)
        if txt_file in os.listdir(predictions_dir):
            
            # Read the predictions from the .txt file
            with open(label_test_file_path, "r") as f:
                lines = f.readlines()
                if lines:  
                    max_conf=0
                    for line in lines:
                        #YOLO format: class x_center y_center width height confidence
                        line_parts = line.strip().split()
                        c = float(line_parts[5])
                        if c>=max_conf:
                            label_name = int(line_parts[0]) 
                            xcenter = float(line_parts[1])
                            ycenter = float(line_parts[2])
                            bbx_width = float(line_parts[3])
                            bbx_height = float(line_parts[4])
                            conf = float(line_parts[5])
                            max_conf = conf
        else:
            label_name = 5  # Placeholder class ID for no predictions
            xcenter = 0.5   # Dummy values for bounding box
            ycenter = 0.5
            bbx_width = 0.2
            bbx_height = 0.2
            conf = 0.5

        writer.writerow([
            sno,
            image_id,
            class_names[label_name],
            conf,
            xcenter,
            ycenter,
            bbx_width,
            bbx_height
        ])
        sno+=1

print(f"Submission file saved to {output_csv}")


In [None]:
len(pd.read_csv("/kaggle/working/submission.csv"))

In [None]:
import pandas as pd

test_csv_path = "/kaggle/working/submission.csv"  
submission_csv_path = "/kaggle/input/dlp-object-detection-week-10/sample_submission.csv"  

# Load both CSV files
test_df = pd.read_csv(test_csv_path)
submission_df = pd.read_csv(submission_csv_path)

# Merge based on 'ImageID' to reorder test_df
reordered_test_df = submission_df[['ImageID']].merge(test_df, on='ImageID', how='left')

# Reorder columns to have 'id' as the first column
column_order = ['id', 'ImageID', 'LabelName', 'Conf', 'xcenter', 'ycenter', 'bbx_width', 'bbx_height']
reordered_test_df = reordered_test_df[column_order]

# Reset the 'id' column to start from 0
reordered_test_df['id'] = range(len(reordered_test_df))


test_csv_path1 = "/kaggle/working/submission.csv"

reordered_test_df.to_csv(test_csv_path1, index=False)

print(f"file saved in {test_csv_path1}")
print(reordered_test_df.head())

# Auto Submission to Kaggle
from kaggle.api.kaggle_api_extended import KaggleApi

api = KaggleApi()
api.authenticate()

api.competition_submit(
    test_csv_path1,  
    message="Auto submission Done Meghna",  
    competition="dlp-object-detection-week-10"  
)

print("Submission complete Meghna!")