In [1]:
import random
from tqdm.auto import tqdm
import os
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from types import SimpleNamespace
import torch
from torch import nn
from torchvision import transforms as M
import torchvision.transforms.functional as F
from torchvision.io import read_image
from torchvision.utils import draw_bounding_boxes, make_grid, save_image
from torchvision.ops import box_area, box_convert, box_iou

!pip install -q torchmetrics
!pip install -q lightning

from torchmetrics.detection import MeanAveragePrecision
import lightning as L

import wandb

[0m

caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so: undefined symbol: _ZN3tsl6StatusC1EN10tensorflow5error4CodeESt17basic_string_viewIcSt11char_traitsIcEENS_14SourceLocationE']
caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io.so: undefined symbol: _ZTVN10tensorflow13GcsFileSystemE']


In [2]:
wandb.login()

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

  ········································


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [3]:
#DEFINE CONSTANST
config = SimpleNamespace(
    PROJECT_NAME = 'WHEAT DETECTION WITH PYTORCH LIGHTNING AND WANDB',
    ENTITY = 'ABISOYE',
    SAVE_CODE  =False,
    BATCH_SIZE = 32,
    NUM_WORKERS = os.cpu_count()
)

In [4]:
run = wandb.init(job_type='EDA',
                project=config.PROJECT_NAME,
                entity=config.ENTITY,
                save_code = config.SAVE_CODE,
                name = 'Exploratory Data Analysis')

[34m[1mwandb[0m: Currently logged in as: [33mabisoye[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [5]:
data_path = Path('/kaggle/input/global-wheat-detection')
train_path = data_path / 'train'
test_path = data_path / 'test'
train_csv_path = data_path / 'train.csv'
ss_path = data_path / 'sample_submission.csv'

In [6]:
train_df = pd.read_csv(str(train_csv_path))
ss = pd.read_csv(str(ss_path))

In [7]:
train_table = wandb.Table(dataframe=train_df)
ss_table = wandb.Table(dataframe=ss)

run.log({'train df' : train_table})
run.log({'sample submission df': ss_table})

In [8]:
artifact = wandb.Artifact(name = 'Exploratory-Data-Analysis', type='dataset')

In [9]:
artifact.add_dir(data_path,'data_path')
# artifact.add_dir(train_path,'train_path')
# artifact.add_dir(test_path,'test_path')
# artifact.add_file(train_csv_path,'train_csv')
# artifact.add_file(ss_path,'sample_submission')

[34m[1mwandb[0m: Adding directory to artifact (/kaggle/input/global-wheat-detection)... Done. 12.1s


In [10]:
len(os.listdir(train_path)), len(os.listdir(test_path))

(3422, 10)

In [11]:
def group_boxes(group):
    boundaries = group['bbox'].str.split(',', expand=True)
    boundaries[0] = boundaries[0].str.slice(start=1)
    boundaries[3] = boundaries[3].str.slice(stop=-1)
    return boundaries.values.astype(float)

In [12]:
train_df.head()

Unnamed: 0,image_id,width,height,bbox,source
0,b6ab77fd7,1024,1024,"[834.0, 222.0, 56.0, 36.0]",usask_1
1,b6ab77fd7,1024,1024,"[226.0, 548.0, 130.0, 58.0]",usask_1
2,b6ab77fd7,1024,1024,"[377.0, 504.0, 74.0, 160.0]",usask_1
3,b6ab77fd7,1024,1024,"[834.0, 95.0, 109.0, 107.0]",usask_1
4,b6ab77fd7,1024,1024,"[26.0, 144.0, 124.0, 117.0]",usask_1


In [13]:
train_df.source.value_counts()

ethz_1       51489
arvalis_1    45716
rres_1       20236
arvalis_3    16665
usask_1       5807
arvalis_2     4179
inrae_1       3701
Name: source, dtype: int64

In [14]:
train_df_bbox_per_img_id = train_df.groupby('image_id').apply(group_boxes).to_frame().reset_index()
train_df_bbox_per_img_id.columns  = ['image_id','boxes']

In [15]:
train_df_bbox_per_img_id.head()

Unnamed: 0,image_id,boxes
0,00333207f,"[[0.0, 654.0, 37.0, 111.0], [0.0, 817.0, 135.0..."
1,005b0d8bb,"[[765.0, 879.0, 116.0, 79.0], [84.0, 539.0, 15..."
2,006a994f7,"[[437.0, 988.0, 98.0, 36.0], [309.0, 527.0, 11..."
3,00764ad5d,"[[89.0, 256.0, 113.0, 107.0], [216.0, 282.0, 1..."
4,00b5fefed,"[[709.0, 97.0, 204.0, 105.0], [775.0, 250.0, 1..."


In [16]:
class_id_to_label = {0 : 'wheat'}
class_id_to_label

{0: 'wheat'}

In [17]:
def convert_bbox_to_wandb_bbox_format(box):
    

    wbox = {
        'position' : {
            'minX' : box[0].item(),
            'maxX' : box[2].item(),
            'minY' : box[1].item(),
            'maxY' : box[3].item()
        },
        'class_id' : 0,
        'domain' : 'pixel',
        'box_caption': 'wheat'
    }
    
    #print(wbox)
    return wbox

In [18]:
def create_box_image(image , boxes):
    all_boxes = [convert_bbox_to_wandb_bbox_format(box) for box in boxes]
    box_image = wandb.Image(image, boxes = {"ground-truth bounding_boex": {"box_data": all_boxes, "class_labels" : class_id_to_label}})
    return box_image

In [19]:

def create_table(train_path = train_path, df = train_df_bbox_per_img_id, N = 200):
    
    df = df[:N]
    table = wandb.Table(columns=['image_id', 'image', 'source'])
    for i, image_id in tqdm(enumerate(df.image_id), total = len(df)):
        
        #print(image_id)
        source = train_df[train_df.image_id == image_id]['source'].values.tolist()[0]
        #print(source)
        full_image_path = os.path.join(train_path, image_id + '.jpg')
        image = F.to_pil_image(read_image(full_image_path))
        
        boxes = df.iloc[i]['boxes'].tolist()
        boxes = torch.as_tensor(boxes)
        boxes_xyxy = box_convert(boxes, in_fmt='xywh', out_fmt='xyxy')
        
        box_image = create_box_image(image, boxes_xyxy)
        table.add_data(image_id, box_image, source)
    
    return table
        
        
table = create_table()

  0%|          | 0/200 [00:00<?, ?it/s]

In [20]:
artifact.add(table, 'EDA_table')

ArtifactManifestEntry(path='EDA_table.table.json', digest='aXfN0YP2olZM3fNG2CS+lA==', ref=None, birth_artifact_id=None, size=1204059, extra={}, local_path='/root/.local/share/wandb/artifacts/staging/tmp0z8htufp')

In [21]:
run.log_artifact(artifact)
run.finish()