In [3]:
import torch
import torch.nn as nn
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, random_split
!pip install wandb -qU
from google.colab import drive
drive.mount('/content/drive')
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m19.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.3/207.3 kB[0m [31m27.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m300.2/300.2 kB[0m [31m37.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
[?25hMounted at /content/drive
cuda


In [13]:
import wandb
wandb.login()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [2]:
manual_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

In [4]:
import os
import torch
import torchvision
import torchvision.transforms as transforms
from torchvision import models
import shutil
from tqdm import tqdm
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
import torch.optim as optim

In [5]:
root_dir = '/content/cifar_train_data'
os.makedirs(root_dir, exist_ok=True)
# change the range to number of classes in dataset
for i in range(10):
    os.makedirs(os.path.join(root_dir, str(i)), exist_ok=True)

In [6]:
trainset = torchvision.datasets.CIFAR10(root=root_dir, train=True, download=True, transform=manual_transforms)
trainloader = DataLoader(trainset, batch_size=1, shuffle=True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /content/cifar_train_data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:05<00:00, 32028383.25it/s]


Extracting /content/cifar_train_data/cifar-10-python.tar.gz to /content/cifar_train_data


In [7]:
class_names = trainset.classes
class_names

['airplane',
 'automobile',
 'bird',
 'cat',
 'deer',
 'dog',
 'frog',
 'horse',
 'ship',
 'truck']

In [8]:
def save_images_to_folders(dataset, root_dir):
    for idx, (image, label) in enumerate(tqdm(dataset)):
        label_dir = os.path.join(root_dir, str(label))
        img_path = os.path.join(label_dir, f'{idx}.png')
        torchvision.utils.save_image(image, img_path)

In [9]:
save_images_to_folders(trainset, root_dir)

100%|██████████| 50000/50000 [08:24<00:00, 99.07it/s] 


In [31]:
#remove other data in the folder except 10 class folders

# shutil.rmtree('/content/cifar_train_data/cifar-10-batches-py')


In [33]:
import os
import pandas as pd


data = []

for label in os.listdir(root_dir):
    label_dir = os.path.join(root_dir, label)
    if os.path.isdir(label_dir):
        for file_name in os.listdir(label_dir):
            file_path = os.path.join(label_dir, file_name)
            if os.path.isfile(file_path):
                data.append([f"{label}/{file_name}", label])


df = pd.DataFrame(data, columns=['filepath', 'label'])


df.to_csv('annotations_cifar10.csv', index=False)

In [34]:
config = {
    "learning_rate": 0.01,
    "batch_size": 64,
    "epochs": 10,
}

In [14]:
project_name = 'Transfer Learning Experiments'
run_name = 'cifar10'
github_url_base = 'https://github.com/Aadharsh1/ML-Deep-Learning/blob/main/Weights_Biases'
file_name = f'{run_name}.ipynb'
notebook_link = f'{github_url_base}/{project_name}/{file_name}'

run = wandb.init(
    project=project_name,
    name=run_name,
    notes=notebook_link,
    config=config,
    job_type="train")

annotations_path = '/content/annotations_cifar10.csv'
labels_at = wandb.Artifact("annotations_cifar10", type="dataset")
labels_at.add_reference(f'file://{annotations_path}')
run.log_artifact(labels_at)

[34m[1mwandb[0m: Currently logged in as: [33maadharshr-2022[0m ([33maadharshr-2022-Singapore Management University[0m). Use [1m`wandb login --relogin`[0m to force relogin


<Artifact annotations_cifar10>

In [35]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import os
import pandas as pd
from PIL import Image

class Dataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        image = Image.open(img_path).convert("RGB")
        label = int(self.img_labels.iloc[idx, 1])
        if self.transform:
            image = self.transform(image)
        return image, label

In [36]:
dataset = Dataset(annotations_file='/content/annotations_cifar10.csv', img_dir=root_dir, transform=transforms.ToTensor())
train_loader = DataLoader(dataset, batch_size=config["batch_size"], shuffle=True)


In [37]:
model = models.efficientnet_b0()
model = model.to(device)


model.classifier[1] = nn.Linear(model.classifier[1].in_features, out_features=10)


In [38]:
loss_fn = nn.CrossEntropyLoss()

optimizer = torch.optim.SGD(params=model.parameters(), lr=config['learning_rate'])

def accuracy_function(y_true, y_pred):
    correct = torch.eq(y_true, y_pred).sum().item()
    acc = (correct / len(y_pred)) * 100
    return acc

In [39]:
from tqdm.auto import tqdm
def train_function(model, device, train_dataloader, loss_function, optimizer):
  model.train()
  model = model.to(device)
  training_loss = 0
  trng_acc = 0
  for X_train, y_train in tqdm(train_dataloader):
    X_train, y_train = X_train.to(device), y_train.to(device)
    y_train_logits = model(X_train)
    loss = loss_function(y_train_logits, y_train)
    training_loss += loss
    trng_acc += accuracy_function(y_train, torch.argmax(y_train_logits, dim=1))
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
  # find average loss per batch
  return (training_loss/len(train_dataloader), trng_acc/len(train_dataloader))


def validate_function(model, device, validation_dataloader, loss_function, accuracy_function):
  model.eval()
  model = model.to(device)
  validation_loss = 0
  validation_acc = 0

  with torch.inference_mode():
    for X_val, y_val in tqdm(validation_dataloader):
      X_val , y_val = X_val.to(device), y_val.to(device)
      val_logits = model(X_val)
      val_pred = torch.argmax(val_logits, dim=1)
      validation_acc += accuracy_function(y_val, val_pred)
      validation_loss += loss_function(val_logits, y_val)
  return validation_loss/len(validation_dataloader), validation_acc/len(validation_dataloader)

In [40]:
epochs = config['epochs']
for epoch in range(epochs):
  train_loss, trng_acc = train_function(model, device, train_loader, loss_fn, optimizer)
  # val_loss, val_acc = validate_function(model, device, test_loader, loss_fn, accuracy_function)
  wandb.log({
        'training_loss': train_loss,
        'training_accuracy': trng_acc,
        # 'validation_loss': val_loss,
        # 'validation_accuracy': val_acc
    })
  print(f'Epoch {epoch}: Training Loss: {train_loss:.4f}, Training Accuracy: {trng_acc:.4f} ')

  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 0: Training Loss: 1.8253, Training Accuracy: 31.5837 


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 1: Training Loss: 1.4620, Training Accuracy: 46.6412 


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 2: Training Loss: 1.2639, Training Accuracy: 54.5257 


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 3: Training Loss: 1.1220, Training Accuracy: 59.8605 


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 4: Training Loss: 1.0167, Training Accuracy: 63.8867 


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 5: Training Loss: 0.9262, Training Accuracy: 67.4572 


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 6: Training Loss: 0.8564, Training Accuracy: 69.9249 


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 7: Training Loss: 0.7883, Training Accuracy: 72.1787 


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 8: Training Loss: 0.7310, Training Accuracy: 74.3566 


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 9: Training Loss: 0.6860, Training Accuracy: 76.0390 


In [41]:
project_name = wandb.run.project
run_name = wandb.run.name
# change notebook name according to name of current notebook
notebook_path = '/content/drive/MyDrive/Colab Notebooks/tl_1.ipynb'

In [42]:
url_path = '/content/drive/MyDrive/github_url.txt'
with open(url_path, 'r') as file:
    github_url = file.read().strip()

In [43]:
import os
import subprocess
import shutil
directory = project_name
file_name = f"{run_name}.ipynb"
file_path = os.path.join('/content/ML-Deep-Learning/Weights_Biases', directory, file_name)

In [44]:
repo_path = '/content/ML-Deep-Learning'
if not os.path.exists(repo_path):
    subprocess.run(["git", "clone", github_url, repo_path])

os.chdir(repo_path)

In [45]:
project_directory = os.path.join('Weights_Biases', directory)
if not os.path.exists(project_directory):
    os.makedirs(project_directory)
    print(f"Directory '{project_directory}' created.")
else:
    print(f"Directory '{project_directory}' already exists.")

Directory 'Weights_Biases/Transfer Learning Experiments' created.


In [None]:
shutil.copyfile(notebook_path, file_path)
!git config --global user.email "aadharshrajagopal1@gmail.com"
!git config --global user.name "Aadharsh1"
!git remote set-url origin {github_url}

In [None]:
!cd /content/ML-Deep-Learning/Weights_Biases

In [None]:
!git add .

In [None]:
!git commit -m "Add notebook"

In [None]:
!git push origin main

In [None]:
wandb.finish()