In [None]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'war-tech-v2-0-by-gontech:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F3810166%2F6604065%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240307%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240307T144641Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3Db3bccd31011d9920b59ad8d08e65b145c3ed10b7a34435a66a1f5b858174de5e31fb9a28f19801b7bad2af6fa5dc1e8e48f8cb3cca4bc2f31eaf8ccdc4b531135e09b52d553b824d44b93ca274e58872b572c720887e46783daf60b6226c9b8c563e701f9fe4f86f32d87fe91dd5b0fd733e67f1a47cb127cc2f7096c257e752be427ef0d472288ff15f7ff4e20a2cacce2ae4fa9270aba828760e268876e86e32c9244157352805b1669cc2d8f67e911cf885c768b37f81a710dbedc00417fcf7fdbd3037bed88f4c418e530b7f4a53323555e614a9c9a898d39692bb3b4c124ad78a38577bc258a8058fd0bde2f81f7cda9e2efb0e74460bdbe6df35770bb4'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


In [None]:
import numpy as np
import pandas as pd
import os

In [None]:
file_names = os.listdir("/kaggle/input/war-tech-v2-0-by-gontech/war_TCHBYGON/obshaya_papk")

In [None]:
os.mkdir("datasets")
os.mkdir("datasets/train")
os.mkdir("datasets/val")
os.mkdir("datasets/test")

In [None]:
os.mkdir("datasets/train/images")
os.mkdir("datasets/train/labels")
os.mkdir("datasets/test/images")
os.mkdir("datasets/test/labels")
os.mkdir("datasets/val/images")
os.mkdir("datasets/val/labels")

In [None]:
from sklearn.model_selection import train_test_split
train, test = train_test_split(file_names, test_size=0.1)
train, Val = train_test_split(train, test_size=0.15)

In [None]:
import shutil

In [None]:
orpath = "/kaggle/input/war-tech-v2-0-by-gontech/war_TCHBYGON/obshaya_papk"
for t in train:
    shutil.copyfile(os.path.join(orpath, t), os.path.join("datasets/train/images", t))

In [None]:
for t in test:
    shutil.copyfile(os.path.join(orpath, t), os.path.join("datasets/test/images", t))

In [None]:
for t in Val:
    shutil.copyfile(os.path.join(orpath, t), os.path.join("datasets/val/images", t))

In [None]:
y = pd.read_csv("/kaggle/input/war-tech-v2-0-by-gontech/war_TCHBYGON/war_tech_gont-export.csv")

In [None]:
y["image"].tail()

3421    tank%20(97).jpg
3422    tank%20(97).jpg
3423    tank%20(98).jpg
3424    tank%20(99).jpg
3425    tank%20(99).jpg
Name: image, dtype: object

In [None]:
from sklearn import preprocessing

# label_encoder object knows
# how to understand word labels.
label_encoder = preprocessing.LabelEncoder()

# Encode labels in column 'species'.
y['label']= label_encoder.fit_transform(y['label'])

In [None]:
le_name_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))
print(le_name_mapping)


{'artilleriya': 0, 'bmp': 1, 'bpla': 2, 'bronemashina': 3, 'btr': 4, 'pehota': 5, 'rszo': 6, 'tank': 7}


In [None]:
import cv2

def data2form(filename, xmin, xmax, ymin, ymax, label):
    try:

        img = cv2.imread(f"/kaggle/input/war-tech-v2-0-by-gontech/war_TCHBYGON/obshaya_papk/{filename}")

        height = img.shape[1]
        width = img.shape[0]
        xc = ((xmin+xmax)/2)/width
        yc = ((ymin+ymax)/2)/height
        return f"{label} {xc} {yc} {width} {height}"
    except:
        pass

In [None]:
import tqdm
try:
    for row in tqdm.tqdm(y.loc):
        row["image"]=row["image"].replace('%20', ' ')
        data = data2form(row["image"], row["xmin"] , row["xmax"], row["ymin"], row["ymax"] , row["label"])
        folder = "datasets/train" if row["image"] in train else "datasets/val" if row["image"] in Val else "datasets/test"
        with open(f'{folder}/labels/{row["image"][:-4]}.txt', 'a+') as file:
            file.write(data) if data else ""
except:
    pass

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  row["image"]=row["image"].replace('%20', ' ')
2131it [00:05, 443.58it/s][ WARN:0@5.240] global loadsave.cpp:248 findDecoder imread_('/kaggle/input/war-tech-v2-0-by-gontech/war_TCHBYGON/obshaya_papk/pehota (155).jpg'): can't open/read file: check file path/integrity
[ WARN:0@5.241] global loadsave.cpp:248 findDecoder imread_('/kaggle/input/war-tech-v2-0-by-gontech/war_TCHBYGON/obshaya_papk/pehota (155).jpg'): can't open/read file: check file path/integrity
2485it [00:05, 423.55it/s][ WARN:0@6.118] global loadsave.cpp:248 findDecoder imread_('/kaggle/input/war-tech-v2-0-by-gontech/war_TCHBYGON/obshaya_papk/pehota (320).jpg'): can't open/read file: check file path/integrity
2528it [00:06, 421.87it/s][ WARN:0@6.262] global loadsave.cpp:248 findDecoder imread_('/kaggle/input/war-tech-v2

In [None]:
config_str = """
path: /
train: /kaggle/working/datasets/train
val: /kaggle/working/datasets/val
test: /kaggle/working/datasets/test
names:
  0:artilleriya
  1:bmp
  2:bpla
  3:bronemashina
  4:btr
  5:pehota
  6:rszo
  7:tank
"""


In [None]:
import yaml

# Parse the YAML string
config_data = yaml.safe_load(config_str)
# Specify the file path where you want to save the YAML file
file_path = 'dataset.yaml'
# Write the YAML data to the file
with open(file_path, 'w') as file:
    yaml.dump(config_data, file, default_flow_style=False)
print(f"YAML file saved to {file_path}")

YAML file saved to dataset.yaml


In [None]:
pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.1.24-py3-none-any.whl.metadata (40 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.4/40.4 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
Collecting thop>=0.1.1 (from ultralytics)
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl.metadata (2.7 kB)
Downloading ultralytics-8.1.24-py3-none-any.whl (719 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m719.5/719.5 kB[0m [31m19.3 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hDownloading thop-0.1.1.post2209072238-py3-none-any.whl (15 kB)
Installing collected packages: thop, ultralytics
Successfully installed thop-0.1.1.post2209072238 ultralytics-8.1.24
Note: you may need to restart the kernel to use updated packages.


In [None]:
!yolo settings datasets_dir='/kaggle/working/datasets' wandb=False

  pid, fd = os.forkpty()


💡 Learn about settings at https://docs.ultralytics.com/quickstart/#ultralytics-settings
Printing '[1m[30m/root/.config/Ultralytics/settings.yaml[0m'

settings_version: 0.0.4
datasets_dir: /kaggle/working/datasets
weights_dir: weights
runs_dir: runs
uuid: 1bfc3e992d24318da58ddee183be5bf9388a31f26bab1738e986ec4d297417ff
sync: true
api_key: ''
openai_api_key: ''
clearml: true
comet: true
dvc: true
hub: true
mlflow: true
neptune: true
raytune: true
tensorboard: true
wandb: false



In [None]:
from ultralytics import YOLO
# Load a model
model = YOLO('yolov8n.yaml')  # build a new model from YAML
model = YOLO('yolov8n.pt')  # load a pretrained model (recommended for training)
model = YOLO('yolov8n.yaml').load('yolov8n.pt')  # build from YAML and transfer weights
# Train the model
results = model.train(data='dataset.yaml', epochs=100, imgsz=640)

Transferred 355/355 items from pretrained weights
Ultralytics YOLOv8.1.24 🚀 Python-3.10.13 torch-2.1.2+cpu CPU (Intel Xeon 2.20GHz)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.yaml, data=dataset.yaml, epochs=100, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train2, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_cro

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

  ········································


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [None]:
import os
def list_files(startpath):
    for root, dirs, files in os.walk(startpath):
        level = root.replace(startpath, '').count(os.sep)
        indent = ' ' * 4 * (level)
        print('{}{}/'.format(indent, os.path.basename(root)))
        subindent = ' ' * 4 * (level + 1)
        for f in files:
            print('{}{}'.format(subindent, f))
list_files("data")