In [1]:
# 1) Prepare the dataset:
import os
import pandas as pd
from sklearn.model_selection import train_test_split

# Set the path to the dataset folder
data_path = "C:/Users/Administrator/Desktop/640eb91e5f67b_Problem_Statement__Invoice/inv_train"

# Get the list of image filenames
image_files = [os.path.join(data_path, file) for file in os.listdir(data_path) if file.endswith('.jpeg')]

# Split the dataset into training and testing sets
train_files, test_files = train_test_split(image_files, test_size=0.2, random_state=42)

# Save the list of filenames to a csv file
pd.DataFrame({'filename': train_files}).to_csv('train.csv', index=False)
pd.DataFrame({'filename': test_files}).to_csv('test.csv', index=False)

In [2]:
# 2) Annoting the data
import os
import pandas as pd
import xml.etree.ElementTree as ET

# Set the path to the dataset folder
data_path = "C:/Users/Administrator/Desktop/640eb91e5f67b_Problem_Statement__Invoice/inv_train"

# Parse the xml files and extract the image filenames, bounding box coordinates, and labels
rows = []
for file in os.listdir(data_path):
    if file.endswith('.xml'):
        tree = ET.parse(os.path.join(data_path, file))
        root = tree.getroot()
        filename = root.find('filename').text
        for obj in root.iter('object'):
            label = obj.find('name').text
            xmin = int(obj.find('bndbox').find('xmin').text)
            ymin = int(obj.find('bndbox').find('ymin').text)
            xmax = int(obj.find('bndbox').find('xmax').text)
            ymax = int(obj.find('bndbox').find('ymax').text)
            rows.append([os.path.join(data_path, filename), xmin, ymin, xmax, ymax, label])

# Save the annotations to a csv file
df = pd.DataFrame(rows, columns=['filename', 'xmin', 'ymin', 'xmax', 'ymax', 'label'])
df.to_csv('train_labels.csv', index=False)

In [None]:
!python C:/Users/Administrator/Downloads/yolov5-master/yolov5-master/train.py --img 640 --batch 16 --epochs 100 --data data.yaml --cfg models/yolov5s.yaml --weights yolov5s.pt --name invoice_extraction --cache

In [23]:
import os
import random
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from PIL import Image
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

# Set device
device = torch.device("cpu")

# Define dataset class
class InvoiceDataset(Dataset):
    def __init__(self, data_path, label_path, transforms=None):
        self.data_path = data_path
        self.transforms = transforms
        self.labels = pd.read_csv(label_path)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, index):
        # Load image and label
        image_path = os.path.join(self.data_path, self.labels.iloc[index, 0])
        image = Image.open(image_path).convert("RGB")
        label = self.labels.iloc[index, 1:]

        # Apply transforms
        if self.transforms is not None:
            image = self.transforms(image)

        return image, label


# Split dataset into train and test sets
data_path = "C:/Users/Administrator/Desktop/640eb91e5f67b_Problem_Statement__Invoice/inv_train"
label_path = "invoice_labels.csv"
df_labels = pd.read_csv(label_path)
train_labels, test_labels = train_test_split(df_labels, test_size=0.2, random_state=42)
train_labels.to_csv("train_labels.csv", index=False)
test_labels.to_csv("test_labels.csv", index=False)

# Define transforms for image augmentation
transform = transforms.Compose([
    transforms.Resize((512, 512)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(degrees=(-10, 10)),
    transforms.ToTensor(),
])

# Create dataset objects
train_dataset = InvoiceDataset(data_path, "train_labels.csv", transform)
test_dataset = InvoiceDataset(data_path, "test_labels.csv", transform)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

# Define YOLOv5 model
model = torch.hub.load("ultralytics/yolov5", "custom", path_or_model="/yolov5s.pt", force_reload=True)
model.to(device)

# Train the model
model.train()
epochs = 5
for epoch in range(epochs):
    for batch_idx, (images, targets) in enumerate(train_loader):
        # Send images and targets to device
        images = images.to(device)
        targets = targets.to(device)

        # Forward pass
        outputs = model(images)

        # Compute loss
        loss = outputs.loss
        print(f"Epoch [{epoch+1}/{epochs}], Batch [{batch_idx+1}/{len(train_loader)}], Loss: {loss.item()}")

        # Backward pass and optimize
        loss.backward()
        model.optimizer.step()
        model.optimizer.zero_grad()

# Save the trained model
torch.save(model.state_dict(), "invoice_extraction_model.pt")

# Inference on the test dataset
model.eval()
model.load_state_dict(torch.load("invoice_extraction_model.pt"))
results = []
for batch_idx, (images, targets) in enumerate(test_loader):
    # Send images to device
    images = images.to(device)

    # Make predictions
    with torch.no_grad():
        outputs = model(images)

    # Post-process predictions
    boxes = outputs.xy


Downloading: "https://github.com/ultralytics/yolov5/zipball/master" to C:\Users\Administrator/.cache\torch\hub\master.zip


TypeError: custom() got an unexpected keyword argument 'path_or_model'