In [1]:
#importing libraries
import os
import xml.etree.ElementTree as et
from glob import glob
import pandas as pd

In [2]:
#defining paths
BASE_PATH= r'C:\Users\96657\Downloads\Oxford Pets.v2-by-species.voc'
TRAIN_PATH= r'C:\Users\96657\Downloads\Oxford Pets.v2-by-species.voc\dataset\train'
TEST_PATH= r'C:\Users\96657\Downloads\Oxford Pets.v2-by-species.voc\dataset\test'
YAML_PATH= r'C:\Users\96657\Downloads\Oxford Pets.v2-by-species.voc\data.yaml'

In [3]:
#extracting object information from xml files

In [4]:
def extract_obj_info(filename):
 tree= et.parse(filename)
 root= tree.getroot()
    
 #initializing a new list for each file
 xml_data=[]
    
 #extracting image title 
 image_title=root.find('filename').text
    
 #extracting image size
 width,height=int(root.find('size').find('width').text),int(root.find('size').find('height').text)
    
 #extracting object label and bounding box parameters
 #creating for loop to extract all (multiple) objects from a single xml file
 objects=root.findall('object')
 for o in objects:
     name=o.find('name').text
     xmin=int(o.find('bndbox').find('xmin').text)
     xmax=int(o.find('bndbox').find('xmax').text)
     ymin=int(o.find('bndbox').find('ymin').text)
     ymax=int(o.find('bndbox').find('ymax').text)
     xml_data.append([image_title,width,height,name,xmin,xmax,ymin,ymax])
 return xml_data     
 

In [5]:
#loading xml files from training and testing set
train_xml_list=glob(os.path.join(TRAIN_PATH, '*.xml'))
test_xml_list=glob(os.path.join(TEST_PATH, '*.xml'))


In [6]:
#titles of first 3 xml files of train set
train_xml_list[:3]

['C:\\Users\\96657\\Downloads\\Oxford Pets.v2-by-species.voc\\dataset\\train\\Abyssinian_100_jpg.rf.ac857e7c2457ab89dd6edacb21e6fb7c.xml',
 'C:\\Users\\96657\\Downloads\\Oxford Pets.v2-by-species.voc\\dataset\\train\\Abyssinian_102_jpg.rf.e89f4c1898ba0c2bf4ff8b24f47a5f4c.xml',
 'C:\\Users\\96657\\Downloads\\Oxford Pets.v2-by-species.voc\\dataset\\train\\Abyssinian_103_jpg.rf.d6f56d3f97d62e11ab1c43036e98161f.xml']

In [7]:
#passing the list of xml files to the object info function
train_data= []
test_data= []

#training data
for train_list in map( extract_obj_info, train_xml_list):
   #using extend not append as append will add the entire train_list to the train_data as one element (nested list). 
   #but i want that all elements of train_list be mapped to the train_data
   train_data.extend(train_list)
    
#testing data
for test_list in map( extract_obj_info, test_xml_list):
   test_data.extend(test_list)
    

In [8]:
print(type(train_data))
print(len(train_data))


<class 'list'>
2527


In [9]:
print(train_data[:3])


[['Abyssinian_100_jpg.rf.ac857e7c2457ab89dd6edacb21e6fb7c.jpg', 394, 500, 'cat', 151, 335, 71, 267], ['Abyssinian_102_jpg.rf.e89f4c1898ba0c2bf4ff8b24f47a5f4c.jpg', 500, 465, 'cat', 23, 325, 27, 320], ['Abyssinian_103_jpg.rf.d6f56d3f97d62e11ab1c43036e98161f.jpg', 500, 351, 'cat', 241, 362, 68, 196]]


In [10]:
test_data[:3]

[['Abyssinian_127_jpg.rf.4425d22917ead085ad038931b4806c8f.jpg',
  266,
  400,
  'cat',
  96,
  179,
  246,
  328],
 ['Abyssinian_131_jpg.rf.e8acfb60e4d01529586b9d81930b35a2.jpg',
  311,
  320,
  'cat',
  17,
  206,
  25,
  180],
 ['Abyssinian_149_jpg.rf.84b8351968b42b38478e0b6c35d58d07.jpg',
  500,
  375,
  'cat',
  155,
  347,
  39,
  251]]

In [10]:
#converting into dataframe format
train_df=pd.DataFrame(train_data, columns= ['filename', 'width', 'height', 'name', 'xmin', 'xmax', 'ymin', 'ymax'])
test_df=pd.DataFrame(test_data, columns= ['filename', 'width', 'height', 'name', 'xmin', 'xmax', 'ymin', 'ymax'])

In [11]:
#visualising train_df
train_df.shape

(2527, 8)

In [12]:
# displaying first five rows
train_df.head(3)

Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax
0,Abyssinian_100_jpg.rf.ac857e7c2457ab89dd6edacb...,394,500,cat,151,335,71,267
1,Abyssinian_102_jpg.rf.e89f4c1898ba0c2bf4ff8b24...,500,465,cat,23,325,27,320
2,Abyssinian_103_jpg.rf.d6f56d3f97d62e11ab1c4303...,500,351,cat,241,362,68,196


In [13]:
train_df['name'].unique()


array(['cat', 'dog'], dtype=object)

In [14]:
len(train_df['filename'].unique())

2523

In [15]:
#checking the count of instances for every label
train_df['name'].value_counts()

name
dog    1731
cat     796
Name: count, dtype: int64

In [16]:
#visualizing test_df
test_df.shape

(358, 8)

In [18]:
test_df.head(3)

Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax
0,Abyssinian_127_jpg.rf.4425d22917ead085ad038931...,266,400,cat,96,179,246,328
1,Abyssinian_131_jpg.rf.e8acfb60e4d01529586b9d81...,311,320,cat,17,206,25,180
2,Abyssinian_149_jpg.rf.84b8351968b42b38478e0b6c...,500,375,cat,155,347,39,251


In [19]:
len(test_df['filename'].unique())

358

In [20]:
#calculating normalized bounding box coordinates and dimensions and label encoding
#adding new columns to testing data
for df in (train_df, test_df):
  df['centre x']= ((df['xmin']+ df['xmax'])/2)/df['width']
  df['centre y']= ((df['ymin']+ df['ymax'])/2)/df['height']
  df['w']= ((df['xmax']- df['xmin'])/2)/df['width']
  df['h']= ((df['ymax']- df['ymin'])/2)/df['height']
  #label encoding
#data is in categorical format (cat or dog) but we have to convert it in numerical format
#replace cat with 0 and dog with 1
  labels={'cat':0, 'dog':1}
  df['id']=df['name'].map(labels)

In [21]:
train_df.head(3)

Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax,centre x,centre y,w,h,id
0,Abyssinian_100_jpg.rf.ac857e7c2457ab89dd6edacb...,394,500,cat,151,335,71,267,0.616751,0.338,0.233503,0.196,0
1,Abyssinian_102_jpg.rf.e89f4c1898ba0c2bf4ff8b24...,500,465,cat,23,325,27,320,0.348,0.373118,0.302,0.315054,0
2,Abyssinian_103_jpg.rf.d6f56d3f97d62e11ab1c4303...,500,351,cat,241,362,68,196,0.603,0.376068,0.121,0.182336,0


In [22]:
test_df.head(3)

Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax,centre x,centre y,w,h,id
0,Abyssinian_127_jpg.rf.4425d22917ead085ad038931...,266,400,cat,96,179,246,328,0.516917,0.7175,0.156015,0.1025,0
1,Abyssinian_131_jpg.rf.e8acfb60e4d01529586b9d81...,311,320,cat,17,206,25,180,0.358521,0.320312,0.303859,0.242188,0
2,Abyssinian_149_jpg.rf.84b8351968b42b38478e0b6c...,500,375,cat,155,347,39,251,0.502,0.386667,0.192,0.282667,0


In [23]:
#creating text files
#for every image a corresponding text file will be generated holding the values of centre x, y , w,h, id
def save_labels(df, folder_path):
    for filename, group in df.groupby('filename'):    #groupby [prevents multiple text files forming for a single image
      txt_filename= os.path.join(folder_path, os.path.splitext(filename)[0] + '.txt') 
# every image has image title and extension. image title is at 0th index and extension at 1th. here, we extracted only the image title (at oth index).
# txt files will be generated with same filenames
#now storing parameters in the txt file
      group[['id', 'centre x', 'centre y', 'w', 'h']].to_csv(txt_filename, sep=' ', index=False, header=False ) #usually csv files have comma separated values, but we want space separated. also we dont want index or header.
    


In [24]:
save_labels(train_df, TRAIN_PATH)
save_labels(test_df, TEST_PATH)




In [25]:
#creating YAML file



In [26]:
#training YOLO model

In [27]:
!pip install ultralytics



In [28]:
os.chdir(BASE_PATH)

In [29]:
ls


 Volume in drive C is Windows-SSD
 Volume Serial Number is C669-8162

 Directory of C:\Users\96657\Downloads\Oxford Pets.v2-by-species.voc

10/31/2024  01:01 AM    <DIR>          .
10/30/2024  08:35 PM    <DIR>          ..
10/31/2024  12:00 AM    <DIR>          .ipynb_checkpoints
10/31/2024  01:15 AM               175 data.yaml
10/31/2024  01:16 AM    <DIR>          dataset
10/30/2024  08:35 PM               763 README.dataset.txt
10/30/2024  08:35 PM               380 README.roboflow.txt
10/31/2024  12:51 AM    <DIR>          runs
               3 File(s)          1,318 bytes
               5 Dir(s)  36,914,839,552 bytes free


from ultralytics import YOLO

In [30]:
# Step 1: Import the necessary library
from ultralytics import YOLO

# Step 2: Load the model configuration
model = YOLO('yolov8s.yaml')




In [None]:
 #training the model
model.train(data='data.yaml', epochs=30, batch=8, name='Model')

Ultralytics 8.3.25  Python-3.12.6 torch-2.5.1+cpu CPU (Intel Core(TM) i5-8265U 1.60GHz)
[34m[1mengine\trainer: [0mtask=detect, mode=train, model=yolov8s.yaml, data=data.yaml, epochs=30, time=None, patience=100, batch=8, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=Model5, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, show_b

[34m[1mtrain: [0mScanning C:\Users\96657\Downloads\Oxford Pets.v2-by-species.voc\dataset\train.cache... 2523 images, 0 background[0m
[34m[1mval: [0mScanning C:\Users\96657\Downloads\Oxford Pets.v2-by-species.voc\dataset\test.cache... 358 images, 0 backgrounds, 0[0m


Plotting labels to runs\detect\Model5\labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.001667, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added 
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1mruns\detect\Model5[0m
Starting training for 30 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/30         0G      3.099      4.556      3.755          4        640: 100%|██████████| 316/316 [3:59:10<00:00,
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 23/23 [02:34


                   all        358        358      0.509    0.00847    0.00453    0.00129

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/30         0G      2.816      3.693       2.99         14        640:  71%|███████   | 223/316 [2:34:44<1:07:1