In [None]:
import numpy as np 
import pandas as pd 
from torch.utils.data import DataLoader, RandomSampler, Dataset
from torchvision.transforms import transforms
from torchvision import datasets
import cv2
import os
from random import sample
from os.path import join

In [None]:
!pip install semilearn==0.3.8a
!nvidia-smi

In [None]:
!pip install wandb
!pip install aim

In [None]:
import semilearn
from semilearn import get_dataset, get_data_loader, get_net_builder, get_algorithm, get_config, Trainer
from semilearn import BasicDataset, split_ssl_data

In [None]:
config = {
    'algorithm': 'fixmatch',
    'net': 'vit_tiny_patch2_32',
    'use_pretrain': True, 
    'pretrain_path': 'https://github.com/microsoft/Semi-supervised-learning/releases/download/v.0.0.0/vit_tiny_patch2_32_mlp_im_1k_32.pth',

    # optimization configs
    'epoch': 5,  # set to 100
    'num_train_iter': 5000,  # set to 102400
    'num_eval_iter': 500,   # set to 1024
    'num_log_iter': 50,    # set to 256
    'optim': 'AdamW',
    'lr': 5e-4,
    'layer_decay': 0.5,
    'batch_size': 128,
    'eval_batch_size': 128,


    # dataset configs
    'dataset': 'semi-nat',
    'num_labels': 9721,
    'num_classes_in': 810,
    'num_classes_out': 1629,
    'img_size': 224,
    'crop_ratio': 0.875,
    'data_dir': './data',
  

    # algorithm specific configs
    'hard_label': True,
    'uratio': 2,
    'ulb_loss_ratio': 1.0,

    # device configs
    'gpu': 0,
    'world_size': 1,
    'distributed': False,
    "num_workers": 2,
}
config = get_config(config)

In [None]:
algorithm = get_algorithm(config,  get_net_builder(config.net, from_name=False), tb_log=None, logger=None)

In [None]:
# replace with your own code

data = np.random.randint(0, 255, size=3072 * 1000).reshape((-1, 32, 32, 3))
data = np.uint8(data)
target = np.random.randint(0, 10, size=1000)
lb_data, lb_target, ulb_data, ulb_target = split_ssl_data(config, data, target, 10,
                                                          10, include_lb_to_ulb=config.include_lb_to_ulb)

train_transform_ = transforms.Compose([transforms.RandomHorizontalFlip(),
                                      transforms.RandomCrop(32, padding=int(32 * 0.125), padding_mode='reflect'),
                                      transforms.ToTensor(),
                                      transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])

train_strong_transform_ = transforms.Compose([transforms.RandomHorizontalFlip(),
                                             transforms.RandomCrop(32, padding=int(32 * 0.125), padding_mode='reflect'),
                                             transforms.ToTensor(),
                                             transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])

lb_dataset_ = BasicDataset(config.algorithm, lb_data, lb_target, config.num_classes, train_transform_, is_ulb=False)
ulb_dataset_ = BasicDataset(config.algorithm, lb_data, lb_target, config.num_classes, train_transform_, is_ulb=True, strong_transform=train_strong_transform_)

In [None]:
lb_dataset_[0]

## **Read Labeled Data**

In [None]:
batch_size = 128

train_transform = transforms.Compose([
        transforms.Resize((32, 32)),
        transforms.RandomRotation(10),
        transforms.RandomHorizontalFlip(p=0.1),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

train_strong_transform = transforms.Compose([transforms.RandomHorizontalFlip(),
                                             transforms.RandomCrop(32, padding=int(32 * 0.125), padding_mode='reflect'),
                                             transforms.ToTensor(),
                                             transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])


train_dataset = datasets.ImageFolder('/kaggle/input/semi-inat-2021/l_train/l_train', transform=train_transform)
# train_lb_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2, drop_last=True)

In [None]:
l_train_data = []
label_train = []
for i, data_ex in enumerate(train_dataset):
#     print(i)
    l_train_data.append(data_ex[0].numpy().astype(np.uint8))
    label_train.append(data_ex[1])

In [None]:
ltrain_images = np.zeros((9721,224,224,3), dtype=np.int64)
ltrain_labeled = np.zeros((9721,))
train_folder = '/kaggle/input/semi-inat-2021/l_train/l_train'
for sub_folder in os.listdir(train_folder):
    print(sub_folder)
    for filename  in os.listdir(os.path.join(train_folder,sub_folder)):
        img = cv2.imread(os.path.join(train_folder,filename))
        ltrain_images[i] = cv2.resize(img, dsize=(224,224))
        ltrain_labeled[i] = int(sub_folder)

In [None]:
np_l_train_data = np.array(l_train_data).reshape(9721,32,32,3)
print(type(np_l_train_data))

lb_dataset = BasicDataset(config.algorithm, np_l_train_data, np.array(label_train), config.num_classes_in, train_transform, is_ulb=False)
train_lb_loader = get_data_loader(config, train_dataset, config.batch_size)

In [None]:
lb_dataset[0]

## **Read Unlabeled Data**

In [None]:
ulabeled_instances = []

folder = '/kaggle/input/semi-inat-2021/u_train/u_train'
# Load in the images
for i, filename in enumerate(os.listdir(folder)):
#     print(i)
    img = cv2.imread(os.path.join(folder,filename))
    ulabeled_instances.append(cv2.resize(img, (32,32)))
    if i == 9999:
        break
print(type(ulabeled_instances[0]))

In [None]:
ulb_data = np.array(ulabeled_instances)

print(ulb_data.shape)

ulb_dataset = BasicDataset(alg = config.algorithm, data = ulb_data, num_classes = config.num_classes_in + config.num_classes_out, transform = train_transform, is_ulb=True, strong_transform=train_strong_transform)
train_ulb_loader = get_data_loader(config, ulb_dataset, int(config.batch_size * config.uratio))

## Read Evalution Data

In [None]:
eval_transform = transforms.Compose([
        transforms.Resize((32, 32)),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

# eval_dataset = datasets.ImageFolder('/content/drive/My Drive/semi-inat-2021/val', transform=eval_transform)
# eval_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2, drop_last=True)

eval_dataset = BasicDataset(config.algorithm, np_l_train_data, np.array(label_train), config.num_classes_in, eval_transform, is_ulb=False)
eval_loader = get_data_loader(config, eval_dataset, config.eval_batch_size)

In [None]:
trainer = Trainer(config, algorithm)
trainer.fit(train_lb_loader, train_ulb_loader, eval_loader)