In [2]:
import os

def remove_images_without_labels(images_folder, labels_file):
    # Read the labels file and extract image names
    with open(labels_file, 'r') as file:
        labeled_images = set(line.split(';')[0] for line in file.readlines())

    # List all images in the images folder
    all_images = set(os.listdir(images_folder))

    # Identify images without labels
    images_without_labels = all_images - labeled_images

    # Remove images without labels
    for img in images_without_labels:
        img_path = os.path.join(images_folder, img)
        if os.path.isfile(img_path):
            os.remove(img_path)
            print(f"Removed: {img_path}")
    print("Completed removing unlabeled images.")

# Example usage
images_folder = "./TSRD_Train"  # Replace with your image folder path
labels_file = "./TSRD_Train_Annotation/TsignRecgTrain4170Annotation.txt"       # Replace with your .txt file path
remove_images_without_labels(images_folder, labels_file)


Removed: ./TSRD_Train\024_1_0007_1_j.png
Removed: ./TSRD_Train\022_1_0007_1_j.png
Removed: ./TSRD_Train\026_1_0068_1_j.png
Removed: ./TSRD_Train\055_1_0030_1_j.png
Completed removing unlabeled images.


In [1]:
import os
import pandas as pd
from datasets import Dataset, DatasetDict, Features, Value, ClassLabel
from PIL import Image
import json

from transformers import DetrImageProcessor
from torchvision.transforms import ToTensor
import numpy as np

In [14]:
import pandas as pd
import os

# Function to load and parse annotations
def load_annotations(image_folder, annotation_file):
    # Read annotation file with semicolon delimiter

    df = pd.read_csv(annotation_file, header=None, delimiter=";")
    df = df.iloc[:, :-1]

    # Print the first few rows to debug
    print(df.head())
    
    # Assign column names based on your annotation format
    df.columns = ['filename', 'y', 'x', 'x_min', 'y_min', 'x_max', 'y_max', 'category']
    
    # Prepare records for the dataset
    records = []
    for _, row in df.iterrows():
        record = {
            'image_path': f"{image_folder}/{row['filename']}",
            'bbox': [row['x_min'], row['y_min'], row['x_max'], row['y_max']],  # Bounding box [x_min, y_min, x_max, y_max]
            'category': row['category']
        }
        records.append(record)
    
    return records

# Example usage
image_folder = './TSRD_Train'  # Replace with your image folder path
annotation_file = './TSRD_Train_Annotation/TsignRecgTrain4170Annotation.txt'  # Replace with your annotation file path

# Load annotations
data = load_annotations(image_folder, annotation_file)

# Convert to Hugging Face dataset
from datasets import Dataset
dataset = Dataset.from_pandas(pd.DataFrame(data))

# Display dataset
print(dataset)


                0    1    2   3   4    5    6   7
0    022_0001.png  210  197  24  24  189  186  22
1    022_0002.png   93   85  12  12   79   77  22
2    022_0003.png  200  196  28  20  190  182  22
3    022_0004.png  179  173  30  27  159  156  22
4  022_0001_j.png  166  159  38  32  125  123  22
Dataset({
    features: ['image_path', 'bbox', 'category'],
    num_rows: 2047
})


In [15]:
# Convert the data to a Hugging Face dataset
dataset = Dataset.from_pandas(pd.DataFrame(data))

# Optionally split into train and validation sets
train_test_split = dataset.train_test_split(test_size=0.2)

# Show dataset details
print(train_test_split)

DatasetDict({
    train: Dataset({
        features: ['image_path', 'bbox', 'category'],
        num_rows: 1637
    })
    test: Dataset({
        features: ['image_path', 'bbox', 'category'],
        num_rows: 410
    })
})


In [16]:
print(train_test_split['train']["image_path"][0])

./TSRD_Train/022_00013_00017_png_jpg.rf.114ef7256b69789e2d952428244509d0.jpg


In [17]:
#read image
image = Image.open(train_test_split['train']["image_path"][0])

In [None]:

def preprocess_data(example):
    # Load the image
    image = Image.open(example['image_path'])
    
    # Convert the image to RGB
    image = image.convert("RGB")
    
    # Convert the image to a numpy array
    encoding = processor(images=image, annotations=[{
        'bbox': example['bbox'],
        'category_id': example['category']
    }], return_tensors="pt")
    
    # Add the image and annotations back into the example
    example['pixel_values'] = encoding['pixel_values']
    example['labels'] = encoding['labels']
    example['bbox'] = encoding['bbox']
    
    return example

# Example usage
image_folder = './TSRD_Train'  # Replace with your image folder path
annotation_file = './TSRD_Train_Annotation/TsignRecgTrain4170Annotation.txt'  # Replace with your annotation file path

data = load_annotations(image_folder, annotation_file)

# Convert the data to a Hugging Face dataset
dataset = Dataset.from_pandas(pd.DataFrame(data))

# Apply the preprocessing
dataset = dataset.map(preprocess_data, batched=False)

# Split the dataset into train and test sets
train_test_split = dataset.train_test_split(test_size=0.2)

# Apply the preprocessing to the train and test sets
train_dataset = train_test_split['train'].map(preprocess_data, batched=False)
val_dataset = train_test_split['test'].map(preprocess_data, batched=False)

                0    1    2   3   4    5    6   7
0    022_0001.png  210  197  24  24  189  186  22
1    022_0002.png   93   85  12  12   79   77  22
2    022_0003.png  200  196  28  20  190  182  22
3    022_0004.png  179  173  30  27  159  156  22
4  022_0001_j.png  166  159  38  32  125  123  22


Map:   0%|          | 0/2047 [00:00<?, ? examples/s]

Map:   0%|          | 0/1637 [00:00<?, ? examples/s]

Map:   0%|          | 0/410 [00:00<?, ? examples/s]

In [19]:
from transformers import DetrForObjectDetection
from transformers import Trainer, TrainingArguments

# Load a pre-trained DETR model
model = DetrForObjectDetection.from_pretrained('facebook/detr-resnet-50')

# Set up training arguments
training_args = TrainingArguments(
    output_dir='./results',
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=3,
    evaluation_strategy="epoch",
    logging_dir='./logs',
)

# Define the trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
)

# Train the model
trainer.train()


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


ValueError: No columns in the dataset match the model's forward method signature. The following columns have been ignored: [image_path, bbox, annotations, image, category]. Please check the dataset and model. You may need to set `remove_unused_columns=False` in `TrainingArguments`.