In [None]:
!nvcc -V

In [None]:
!git clone https://github.com/ultralytics/yolov5.git  # clone repo
# !pip install -qr yolov5/requirements.txt  # install dependencies
#!pip install torch==1.9.0+cu111 torchvision==0.10.0+cu111 -f https://download.pytorch.org/whl/torch_stable.html

!pip install torch==1.6.0+cu101 torchvision==0.7.0+cu101 -f https://download.pytorch.org/whl/torch_stable.html

In [None]:
# Import necessary libraries
import os
import shutil
import json
import ast
import numpy as np

# Import tqdm for progress bars
from tqdm import tqdm

# Import pandas for data handling
import pandas as pd

# Import scikit-learn for data splitting
from sklearn import model_selection

# Import IPython.display for displaying images
from IPython.display import Image

# Import matplotlib.pyplot for plotting
import matplotlib.pyplot as plt

# Import seaborn for data visualization
import seaborn as sns

# Import fastai.vision for image processing
import fastai.vision as vision


In [None]:
# Define the path to the dataset
DATA_PATH = '/kaggle/input/oil-storage-tanks/Oil Tanks/'

# List files and directories in the specified path
os.listdir(DATA_PATH)

Boundry Box Coordinates

In [None]:
# Define a function to convert bounding box data to a standardized format
def conv_bbox(box_dict):
    # Extract x and y coordinates from the bounding box dictionary
    xs = np.array(list(set([i['x'] for i in box_dict])))
    ys = np.array(list(set([i['y'] for i in box_dict])))
    
    # Calculate minimum and maximum coordinates to define the bounding box
    x_min = xs.min()
    x_max = xs.max()
    y_min = ys.min()
    y_max = ys.max()
    
    return y_min, x_min, y_max, x_max


In [None]:
# Define source and destination directories for image files
source = os.path.join(DATA_PATH, 'image_patches')
destination_1 = 'train'
destination_2 = 'test'

# Create destination directories if they don't exist
if not os.path.isdir(destination_1):
    os.mkdir(destination_1)
if not os.path.isdir(destination_2):
    os.mkdir(destination_2)

# Define a mapping of labels to numeric class IDs
label_to_num = {'Tank': 0, 'Tank Cluster': 1, 'Floating Head Tank': 2}


In [None]:
# Initialize a list to store annotations
annotations = []

# Load JSON labels and process images
json_labels = json.load(open(os.path.join(DATA_PATH, 'labels.json')))
for i in tqdm(range(len(json_labels))):
    file = json_labels[i]['file_name']
    
    # Copy images starting with '01' to the test directory
    if(file.startswith('01')):
        shutil.copy(source + '/' + file, destination_2)
    elif(json_labels[i]['label'] != 'Skip'):
        # Copy images with valid labels to the train directory
        shutil.copy(source + '/' + file, destination_1)
        
        # Process bounding box annotations
        for label in json_labels[i]['label'].keys():
            for box in json_labels[i]['label'][label]:
                y_min, x_min, y_max, x_max = conv_bbox(box['geometry'])
                width = x_max - x_min
                height = y_max - y_min
                
                # Append annotation data to the list
                annotations.append((file.split('.')[0], label_to_num[label], label, [x_min, y_min, width, height]))

# Create a DataFrame to store annotations
annotations = pd.DataFrame(annotations, columns=['image_name', 'class', 'class_name', 'bbox'])

# Print summary information
print('Number of train images: ', len(os.listdir(destination_1)))
print('Number of test images: ', len(os.listdir(destination_2)))
print('Number of Annotated Tanks: ', len(annotations))
print(annotations[70:100])

In [None]:
# Split the dataset into train and validation sets
df_train, df_valid = model_selection.train_test_split(
    annotations, 
    test_size=0.1, 
    random_state=42, 
    shuffle=True, 
    stratify=annotations['class']
)
df_train.shape, df_valid.shape

In [None]:
sns.set({'figure.figsize':(30,10)})
plt.subplot(1,2,1)
ax = sns.countplot(sorted(df_train['class_name']))
ax.set_title('Train set')

plt.subplot(1,2,2)
ax = sns.countplot(sorted(df_valid['class_name']))
ax.set_title('validation set')
plt.show()

In [None]:
# Define a function to convert data into a suitable format for YOLO training
def convert(data, data_type):
    df = data.groupby('image_name')['bbox'].apply(list).reset_index(name='bboxes')
    df['classes'] = data.groupby('image_name')['class'].apply(list).reset_index(drop=True)
    df.to_csv(data_type + '.csv', index=False)
    print(data_type)
    print(df.shape)
    print(df.head())

# Convert train and validation data to the YOLO format
df_train = convert(df_train, 'train')
df_valid = convert(df_valid, 'validation')

In [None]:
# Change directory to 'yolov5' where the YOLOv5 repository is cloned
%cd yolov5

# List the contents of the current directory
!ls

In [None]:
# Create a directory named 'tank_data' within 'yolov5'
!mkdir tank_data

# Change directory to 'tank_data'
%cd tank_data

In [None]:
# Create subdirectories for images and labels
!mkdir images
!mkdir labels

# Change directory to 'images'
%cd images

# Create subdirectories for training and validation images
!mkdir train
!mkdir validation

# Change directory to 'labels'
%cd ..
%cd labels

# Create subdirectories for training and validation labels
!mkdir train
!mkdir validation

# Move back to the root directory
%cd ..
%cd ..
%cd ..

In [None]:
# Print the directory structure to verify the created directories
for root, dir, _ in os.walk('/kaggle/working/yolov5/tank_data'):
    print(root)
    print(dir)

In [None]:
INPUT_PATH = '/kaggle/working/'
OUTPUT_PATH = '/kaggle/working/yolov5/tank_data'
def process_data(data, data_type='train'):
    for _, row in tqdm(data.iterrows(), total = len(data)):
        image_name = row['image_name']
        bounding_boxes = row['bboxes']
        classes = row['classes']
        yolo_data = []
        for bbox, Class in zip(bounding_boxes, classes):
            x = bbox[0]
            y = bbox[1]
            w = bbox[2]
            h = bbox[3]
            x_center = x + w / 2
            y_center = y + h / 2
            
            x_center /= 512
            y_center /= 512
            w /= 512
            h /= 512
            yolo_data.append([Class, x_center, y_center, w, h])
        yoy_data = np.array(yolo_data)
        np.savetxt(
            os.path.join(OUTPUT_PATH, f"labels/{data_type}/{image_name}.txt"),
            yolo_data,
            fmt = ["%d", "%f", "%f", "%f", "%f"]
        )
        shutil.copyfile(
            os.path.join(INPUT_PATH, f"train/{image_name}.jpg"),
            os.path.join(OUTPUT_PATH, f"images/{data_type}/{image_name}.jpg")
        )

df_train = pd.read_csv('/kaggle/working/train.csv')
df_train.bboxes = df_train.bboxes.apply(ast.literal_eval)
df_train.classes = df_train.classes.apply(ast.literal_eval)

df_valid = pd.read_csv('/kaggle/working/validation.csv')
df_valid.bboxes = df_valid.bboxes.apply(ast.literal_eval)
df_valid.classes = df_valid.classes.apply(ast.literal_eval)

process_data(df_train, data_type='train')
process_data(df_valid, data_type='validation')

In [None]:
for root,dir,file in os.walk('yolov5/tank_data'):
    print(root)
    print(dir)
    print(file)

In [None]:
f = open('yolov5/tank_data/labels/train/'+os.listdir('/kaggle/working/yolov5/tank_data/labels/train')[0]) 
print(f.name)
for l in f:
    print(l)

In [None]:
%cd yolov5

In [None]:
%%writefile tank.yaml

train: tank_data/images/train
val: tank_data/images/validation
nc: 3
names: ['Tank','Tank Cluster','Floating Head Tank']

In [None]:
!ls

In [None]:
%cd models
!ls

In [None]:
%cd ..
!ls

In [None]:
!python train.py --img 512 --batch 16 --epochs 200 --data tank.yaml --cfg models/yolov5l.yaml --name oiltank

In [None]:
path = '/kaggle/working/'
# Define a function to plot bounding boxes and labels on an image
def plot_BBox(img_name, ax):
    sns.set({'figure.figsize':(20,10)})  # Set the size of the figure
    img_path = os.path.join(path + 'test', img_name)  # Construct the path to the test image
    image = vision.open_image(img_path)  # Open and load the image using fastai.vision
    image.show(ax=ax, title='Ground Truth ' + img_name)  # Display the original image with a title

    # Extract image number, row, and column from the image name
    no, row, col = map(int, img_name.split('.')[0].split('_'))

    # Calculate an image identifier based on its number, row, and column
    img_id = (no - 1) * 100 + row * 10 + col

    idx = -1  # Initialize an index variable
    bboxes = []  # Initialize a list to store bounding box coordinates
    labels = []  # Initialize a list to store labels
    classes = []  # Initialize a list to store classes

    # Check if the image has labels (not labeled as 'Skip')
    if json_labels[img_id]['label'] != 'Skip':
        for label in json_labels[img_id]['label'].keys():
            for box in json_labels[img_id]['label'][label]:
                # Extract bounding box coordinates and convert them to YOLO format
                bboxes.append(conv_bbox(box['geometry']))
                classes.append(label)  # Append the label to the list
        labels = list(range(len(classes)))  # Generate a list of label indices
        idx = 1  # Set the index to indicate that labels exist

    # Check if labels were found
    if idx != -1:
        # Create an ImageBBox object to overlay bounding boxes and labels on the image
        BBox = vision.ImageBBox.create(*image.size, bboxes, labels, classes)
        image.show(y=BBox, ax=ax)  # Display the image with bounding boxes and labels


In [None]:
# Set the size of the figure (20 inches wide and 30 rows with 10 inches each)
sns.set({'figure.figsize':(20,30*10)})

# Create a subplot with 30 rows and 2 columns (to display images and their predictions)
fig, ax = plt.subplots(30, 2)

# Enumerate over a list of image file names (sorted) from a directory
for i, img_f in enumerate(sorted(os.listdir('/kaggle/input/oil-storage-tanks/Oil Tanks/image_patches/'))[40:70]):

    # Open and load the image using fastai.vision
    image = vision.open_image('/kaggle/input/oil-storage-tanks/Oil Tanks/image_patches/'+img_f)

    # Display the original image with a title ('Predicted' followed by the image file name)
    image.show(ax=ax[i][0], title='Predicted ' + img_f)

    # Call the previously defined 'plot_BBox' function to overlay bounding boxes and labels
    # Pass the image file name and the second axis (ax[i][1]) for displaying predictions
    plot_BBox(img_f, ax[i][1])

# Show the entire grid of images with predictions
plt.show()
