# Full vs Flat Tire

## Library

In [20]:
import os
import pandas as pd
import torch
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from transformers import ViTFeatureExtractor, ViTForImageClassification
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split

In [21]:
# Step 1
class CustomDataset(Dataset):
    """
    Custom Dataset for handling preprocessed data (features and labels).
    """
    def __init__(self, df=None, image_size=None, feature_extractor=None, is_train=True):
        self.df = df
        self.is_train = is_train
        self.image_size = image_size
        self.feature_extractor = feature_extractor

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        image_features = row['features']

        if self.is_train:
            label = row['label']
            # print(f"Returning features and label: {label}")
            return image_features, label
        else:
            image_id = row['id']
            # print(f"Returning features and id: {image_id}")
            return image_features, image_id


In [22]:
# Step 2
def load_and_preprocess_data(image_dir, image_size, feature_extractor):
    # Create a DataFrame to store image paths and labels
    data = {'id': [], 'image_path': [], 'label': []}

    # Map folder names to labels
    label_map = {'full.class': 0, 'flat.class': 1, 'no-tire.class': 2}

    # Traverse image directory
    for label_name, label_id in label_map.items():
        folder_path = os.path.join(image_dir, label_name)
        if not os.path.exists(folder_path):
            continue

        for img_file in os.listdir(folder_path):
            if img_file.lower().endswith(('jpg', 'jpeg', 'png')):
                data['id'].append(img_file.split('.')[0])  # Use the file name (without extension) as the ID
                data['image_path'].append(os.path.join(folder_path, img_file))
                data['label'].append(label_id)

    df = pd.DataFrame(data)

    # Apply feature extraction
    def extract_features(image_path):
        # print(f"Processing image: {image_path}")  # Log the file being processed
        image = Image.open(image_path).convert("RGB")
        image = image.resize(image_size)
        inputs = feature_extractor(images=image, return_tensors='pt')
        return inputs['pixel_values'].squeeze(0)

    df['features'] = df['image_path'].apply(extract_features)

    num_classes = len(label_map)
    return df, num_classes


In [23]:
# Step 3
def build_base_vit_model(num_classes, model_dir):

    model = ViTForImageClassification.from_pretrained(
        model_dir,
        num_labels=num_classes,
        ignore_mismatched_sizes=True
    )
    return model


In [24]:
# Step 4
def train_model(model, train_loader, val_loader, num_epochs):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)

    scaler = torch.cuda.amp.GradScaler()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    criterion = torch.nn.CrossEntropyLoss()

    for epoch in range(num_epochs):
        model.train()
        total_loss = 0

        for images, labels in train_loader:
            images = images.to(device)
            labels = labels.to(device, dtype=torch.long)

            with torch.cuda.amp.autocast():
                outputs = model(images).logits
                loss = criterion(outputs, labels)

            optimizer.zero_grad()
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            total_loss += loss.item()

        avg_loss = total_loss / len(train_loader)
        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {avg_loss:.4f}")

In [25]:
# Step 5
def evaluate_model(model, val_loader):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    model.eval()

    y_true, y_pred = [], []

    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images).logits

            preds = torch.argmax(outputs, dim=1)
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(preds.cpu().numpy())

    # Compute metrics
    acc = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='macro')
    recall = recall_score(y_true, y_pred, average='macro')
    f1 = f1_score(y_true, y_pred, average='macro')

    print(f'Accuracy: {acc:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1: {f1:.4f}')

In [26]:
# Step 6
def save_model(model, save_path):
    torch.save(model.state_dict(), save_path)
    print(f"Model saved to {save_path}")

def load_model(model, load_path='vit_model.pth'):
    model.load_state_dict(torch.load(load_path))
    model.eval()
    print(f"Model loaded from {load_path}")
    return model

In [27]:
# Step 7
def save_predictions(model, data_loader, output_file='predictions.csv'):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    model.eval()

    image_ids, pred_labels = [], []

    with torch.no_grad():
        for images, image_ids_batch in data_loader:
            images = images.to(device)

            # Predict labels
            outputs = model(images).logits
            preds = torch.argmax(outputs, dim=1).cpu().numpy()

            image_ids.extend(image_ids_batch)
            pred_labels.extend(preds)

    # Create DataFrame for predictions
    results_df = pd.DataFrame({
        'id': image_ids,
        'jenis': pred_labels
    })

    # Sort results by image ID and save to CSV
    results_df['id'] = results_df['id'].astype(int)
    results_df = results_df.sort_values(by='id').reset_index(drop=True)
    results_df.to_csv(output_file, index=False)
    print(f"Predictions saved to {output_file}")

In [28]:
# Step 1: Load and Preprocess Data
image_dir = 'F:/DatasetFP/tire-dataset'  # Path to dataset folders
image_size = (224, 224)  # Resize all images to 224x224
model_dir = 'google/vit-base-patch16-224'
num_epochs = 10
save_path = 'vit_trained_wheel.pth'

feature_extractor = ViTFeatureExtractor.from_pretrained(model_dir)
df, num_classes = load_and_preprocess_data(image_dir, image_size, feature_extractor)

# Step 2: Split Dataset into Train, Validation, and Test
train_df, temp_df = train_test_split(df, test_size=0.3, random_state=42)  # Reserve 30% for val/test
val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42)  # Split the 30% into val/test equally

# Step 3: Create Dataset Objects (No Transforms Applied)
train_dataset = CustomDataset(train_df, image_size, feature_extractor)
val_dataset = CustomDataset(val_df, image_size, feature_extractor)
test_dataset = CustomDataset(test_df, image_size, feature_extractor)

# Step 4: Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Step 5: Train the Model
base_model = build_base_vit_model(num_classes, model_dir)
train_model(base_model, train_loader, val_loader, num_epochs)
evaluate_model(base_model, val_loader)

# Save the model
save_model(base_model, save_path)

# Step 6: Test the Model
load_path = 'vit_trained_wheel.pth'
model = load_model(base_model, load_path)
evaluate_model(model, test_loader)  # Evaluate the model on the held-out test set

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([3]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([3, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  scaler = torch.cuda.amp.GradScaler()
  with torch.cuda.amp.autocast():


Epoch [1/10], Loss: 0.5283
Epoch [2/10], Loss: 0.1097
Epoch [3/10], Loss: 0.0251
Epoch [4/10], Loss: 0.0217
Epoch [5/10], Loss: 0.0020
Epoch [6/10], Loss: 0.0006
Epoch [7/10], Loss: 0.0003
Epoch [8/10], Loss: 0.0003
Epoch [9/10], Loss: 0.0002
Epoch [10/10], Loss: 0.0002
Accuracy: 1.0000, Precision: 1.0000, Recall: 1.0000, F1: 1.0000
Model saved to vit_trained_wheel.pth


  model.load_state_dict(torch.load(load_path))


Model loaded from vit_trained_wheel.pth
Accuracy: 1.0000, Precision: 1.0000, Recall: 1.0000, F1: 1.0000


In [29]:
# # Train
# image_dir = 'F:/DatasetFP/tire-dataset'  # Path to dataset folders
# image_size = (224, 224)  # Resize all images to 224x224
# model_dir = 'google/vit-base-patch16-224'
# num_epochs = 10
# save_path = 'vit_trained_wheel.pth'

# feature_extractor = ViTFeatureExtractor.from_pretrained(model_dir)
# df, num_classes = load_and_preprocess_data(image_dir, image_size, feature_extractor)

# # Create Dataset
# train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)
# train_dataset = CustomDataset(train_df, image_size, feature_extractor)
# val_dataset = CustomDataset(val_df, image_size, feature_extractor)

# train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
# val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# model = build_base_vit_model(num_classes, model_dir)
# train_model(model, train_loader, val_loader, num_epochs)
# evaluate_model(model, val_loader)

# save_model(model, save_path)

# # Test
# load_path = 'vit_trained_wheel.pth'
# model = load_model(model, load_path)

In [30]:
# # Create a DataFrame from the dataset
# def dataset_to_dataframe(dataset):
#     data = []
#     for idx in range(len(dataset)):
#         if dataset.is_train:
#             # Get features and labels for training data
#             image_features, label = dataset[idx]
#             data.append({
#                 'features': image_features.numpy(),  # Convert tensor to numpy for visualization
#                 'label': label
#             })
#         else:
#             # Get features and IDs for inference data
#             image_features, image_id = dataset[idx]
#             data.append({
#                 'features': image_features.numpy(),
#                 'id': image_id
#             })

#     # Convert to DataFrame
#     return pd.DataFrame(data)

# # Generate the DataFrame for train_dataset
# train_df = dataset_to_dataframe(train_dataset)

# # Display the DataFrame
# train_df  # Show the first few rows
