<a href="https://colab.research.google.com/github/AdityasArsenal/Yoga_Trainer/blob/main/FineTuned_mobilenetv2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q datasets transformers

In [None]:
import os
from huggingface_hub import notebook_login

notebook_login()

In [None]:
%%capture
!sudo apt -qq install git-lfs
!git config --global credential.helper store

In [None]:
#Mount your google drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!ls -al "/content/drive/MyDrive/YogaPoses.zip/"

In [None]:
!unzip "/content/drive/MyDrive/YogaPoses.zip" -d /tmp/yogaimg

In [None]:
from datasets import load_dataset
ds = load_dataset("imagefolder", data_dir="/tmp/yogaimg/YogaPoses")
ds = ds['train']

In [None]:
data = ds.train_test_split(test_size=0.15)
data

In [None]:
data.push_to_hub("AdityasArsenal/YogaDataSet")

In [None]:
data = load_dataset("AdityasArsenal/YogaDataSet")

In [None]:
ex = data['train'][402]
ex

In [None]:
image = ex['image']
image

In [None]:
labels = data['train'].features['label']
labels

In [None]:
labels.int2str(ex['label'])

In [None]:
!pip install datasets transformers evaluate

In [None]:
from evaluate import load
metric = load("accuracy")

In [None]:
data

In [None]:
labels = data["train"].features["label"].names
label2id, id2label = dict(), dict()
for i, label in enumerate(labels):
    label2id[label] = i
    id2label[i] = label

In [None]:
from transformers import ViTFeatureExtractor

model_name_or_path = 'google/mobilenet_v2_1.0_224'
feature_extractor = ViTFeatureExtractor.from_pretrained(model_name_or_path)

In [None]:
from torchvision.transforms import (
    CenterCrop,
    Compose,
    Normalize,
    RandomHorizontalFlip,
    RandomResizedCrop,
    Resize,
    ToTensor,
)

# MobileNetV2 normalization values
normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

# Training transformations
train_transforms = Compose(
    [
        RandomResizedCrop(224),  # Resize to 224x224 for MobileNetV2
        RandomHorizontalFlip(),
        ToTensor(),
        normalize,
    ]
)

# Validation transformations
val_transforms = Compose(
    [
        Resize(224),  # Resize to 224x224 for MobileNetV2
        CenterCrop(224),
        ToTensor(),
        normalize,
    ]
)

def preprocess_train(example_batch):
    """Apply train_transforms across a batch."""
    example_batch["pixel_values"] = [
        train_transforms(image.convert("RGB")) for image in example_batch["image"]
    ]
    return example_batch

def preprocess_val(example_batch):
    """Apply val_transforms across a batch."""
    example_batch["pixel_values"] = [val_transforms(image.convert("RGB")) for image in example_batch["image"]]
    return example_batch


In [None]:
# split up training into training + validation
train_ds = data['train']
val_ds = data['test']

In [None]:
train_ds = train_ds.map(preprocess_train, batched = True)
val_ds = val_ds.map(preprocess_val, batched = True)

In [None]:
from transformers import AutoModelForImageClassification
from torch import nn

# Model name for MobileNetV2
model_name_or_path = 'google/mobilenet_v2_1.0_224'

# Your labels (yoga poses)
labels = ['Downdog', 'Goddess', 'Plank', 'Tree', 'Warrior2']

# Load the model with ignore_mismatched_sizes=True to skip layer size mismatches
model = AutoModelForImageClassification.from_pretrained(
    model_name_or_path,
    num_labels=len(labels),  # Number of classes in your dataset (5)
    id2label={str(i): label for i, label in enumerate(labels)},  # Mapping indices to class names
    label2id={label: str(i) for i, label in enumerate(labels)},  # Mapping class names to indices
    ignore_mismatched_sizes=True  # Ignore size mismatch for the classifier layers
)

# Access the classifier layer and modify it
in_features = model.classifier.in_features  # Get the input features for the classifier

# Replace the classifier with a new one
model.classifier = nn.Sequential(
    nn.Dropout(0.2),  # Optional dropout layer
    nn.Linear(in_features, len(labels))  # Update the output to the correct number of classes
)

# Now the model is ready to be fine-tuned for your dataset


In [None]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir='finetuned-for-YogaPoses',  # Output directory for the fine-tuned model
    per_device_train_batch_size=16,  # You can adjust the batch size depending on GPU memory
    eval_strategy="steps",  # Use eval_strategy instead of evaluation_strategy
    num_train_epochs=4,  # Number of training epochs
    fp16=True,  # Use mixed precision if your hardware supports it (e.g., GPUs with tensor cores)
    save_steps=100,  # Save model every 100 steps
    eval_steps=100,  # Evaluate every 100 steps
    logging_steps=10,  # Log metrics every 10 steps
    learning_rate=2e-4,  # Learning rate for training
    save_total_limit=2,  # Only keep the last two checkpoints
    remove_unused_columns=False,  # Do not remove unused columns from the dataset
    push_to_hub=True,  # Push the fine-tuned model to Hugging Face Hub
    report_to='tensorboard',  # Use TensorBoard for logging
    load_best_model_at_end=True,  # Load the best model at the end of training
    hub_strategy="end",  # Push the model to the hub at the end of training
    metric_for_best_model='accuracy',  # Optional: Specify the metric to use for selecting the best model
)


In [None]:
import numpy as np
from sklearn.metrics import accuracy_score

# The compute_metrics function takes a NamedTuple as input:
# predictions, which are the logits of the model as Numpy arrays,
# and label_ids, which are the ground-truth labels as Numpy arrays.
def compute_metrics(eval_pred):
    """Computes accuracy on a batch of predictions"""
    # Get the predicted labels by applying argmax on the logits
    predictions = np.argmax(eval_pred.predictions, axis=1)

    # Compute the accuracy using sklearn's accuracy_score
    accuracy = accuracy_score(eval_pred.label_ids, predictions)

    return {'accuracy': accuracy}


In [None]:
import torch

def collate_fn(batch):
    return {
        'pixel_values': torch.stack([x['pixel_values'] for x in batch]),
        'labels': torch.tensor([x['label'] for x in batch])
    }

In [None]:
from transformers import Trainer, AutoFeatureExtractor

# Load the feature extractor
feature_extractor = AutoFeatureExtractor.from_pretrained('google/mobilenet_v2_1.0_224')

trainer = Trainer(
    model=model,  # Your MobileNetV2 model
    args=training_args,  # Your training arguments
    train_dataset=train_ds,  # Training dataset
    eval_dataset=val_ds,  # Validation dataset
    compute_metrics=compute_metrics,  # Compute accuracy
    data_collator=collate_fn,  # Data collator for batching
)


In [None]:
train_results = trainer.train()
# rest is optional but nice to have
trainer.save_model()
trainer.log_metrics("train", train_results.metrics)
trainer.save_metrics("train", train_results.metrics)
trainer.save_state()

In [None]:
metrics = trainer.evaluate()
trainer.log_metrics("eval", metrics)
trainer.save_metrics("eval", metrics)

In [None]:
kwargs = {
    "finetuned_from": model.config._name_or_path,
    "tasks": "image-classification",
    "dataset": 'indian_food_images',
    "tags": ['image-classification'],
}

if training_args.push_to_hub:
    trainer.push_to_hub('🍻 cheers', **kwargs)
else:
    trainer.create_model_card(**kwargs)

In [None]:
from transformers import AutoFeatureExtractor, AutoModelForImageClassification
from PIL import Image
import torch
import requests
import io


In [None]:
# Provide the path to your uploaded image
image_path = "/content/pp.jpg"

# Open the image
image = Image.open(image_path)


In [None]:
image

In [None]:
# Load the feature extractor for MobileNetV2
feature_extractor = AutoFeatureExtractor.from_pretrained('google/mobilenet_v2_1.0_224')

# Preprocess the image
inputs = feature_extractor(images=image, return_tensors="pt")


In [None]:
# Make predictions with the trained model
model.eval()  # Set the model to evaluation mode
with torch.no_grad():  # Disable gradient calculation
    outputs = model(**inputs)

# Get the predicted class (max logit)
predicted_class = torch.argmax(outputs.logits, dim=-1).item()

# Print the predicted class
print(f"Predicted class index: {predicted_class}")
print(f"Predicted class label: {model.config.id2label[str(predicted_class)]}")
