# Training Sample - COPY for each subsequent training

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Load your DataFrame
df = pd.read_csv('your_dataset.csv')  # Ensure columns: 'image_path', 'label'

# Encode labels
label2id = {label: idx for idx, label in enumerate(df['label'].unique())}
id2label = {v: k for k, v in label2id.items()}
df['label_id'] = df['label'].map(label2id)


In [None]:
train_df, test_df = train_test_split(df, test_size=0.2, stratify=df['label_id'], random_state=42)

In [None]:
from torch.utils.data import Dataset
from PIL import Image
from transformers import AutoImageProcessor

class CustomImageDataset(Dataset):
    def __init__(self, dataframe, image_processor):
        self.dataframe = dataframe.reset_index(drop=True)
        self.image_processor = image_processor

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        image_path = self.dataframe.loc[idx, 'image_path']
        label = self.dataframe.loc[idx, 'label_id']
        image = Image.open(image_path).convert('RGB')
        inputs = self.image_processor(images=image, return_tensors="pt")
        inputs['labels'] = label
        return {key: val.squeeze() for key, val in inputs.items()}

In [None]:
from transformers import AutoImageProcessor, AutoModelForImageClassification

# Load image processor and model
image_processor = AutoImageProcessor.from_pretrained("microsoft/resnet-50")
model = AutoModelForImageClassification.from_pretrained(
    "microsoft/resnet-50",
    num_labels=len(label2id),
    id2label=id2label,
    label2id=label2id
)

In [None]:
from transformers import TrainingArguments, Trainer
from torch.utils.data import DataLoader

# Create datasets
train_dataset = CustomImageDataset(train_df, image_processor)
test_dataset = CustomImageDataset(test_df, image_processor)

# Define training arguments
training_args = TrainingArguments(
    output_dir="./resnet_finetuned",
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    num_train_epochs=10,
    logging_dir="./logs",
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy"
)

In [None]:
import numpy as np
import evaluate

# Define compute metrics function
accuracy = evaluate.load("accuracy")

def compute_metrics(p):
    return accuracy.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics
)

# Train the model
trainer.train()

In [None]:
# Evaluate the model
metrics = trainer.evaluate()
print(metrics)