### Label propagation using CNN feature extraction 

In [19]:
from data_processor import test_loader, unlabeled_loader
import torch
import torch.nn as nn
from torchvision import models, transforms
from torchvision.datasets import ImageFolder
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets
from sklearn.semi_supervised import LabelSpreading
from sklearn.semi_supervised import LabelPropagation
from models import CNNet
from torch.utils.data import DataLoader
from models import train_model

In [20]:
ts = [transforms.RandomHorizontalFlip(p=1),
     transforms.RandomRotation(degrees=(-10, 10), fill = (130, 130,130)),
     transforms.RandomVerticalFlip(p=1),
     transforms.RandomResizedCrop(size = (100,100), scale=(0.8, 1.0), ratio=(0.75, 1.3333333333333333), interpolation=2)]


     
transformation = transforms.Compose([
transforms.Resize((100, 100)),
transforms.RandomApply(ts, p=0.5),
transforms.Grayscale(),
transforms.ToTensor()])

In [21]:
train_data_path = './semi_supervised_data/train_data'
train_set = ImageFolder(
	    root=train_data_path,
	    transform = transformation)
    
train_loader = DataLoader(dataset=train_set, batch_size=64, shuffle=True)

In [22]:
model = CNNet()
model, loss = train_model(model, train_loader, 10)

started training ...
the loss after processing this epoch is:  50.77671743184328
the loss after processing this epoch is:  24.374547854065895
the loss after processing this epoch is:  19.400708597153425
the loss after processing this epoch is:  18.717636421322823
the loss after processing this epoch is:  18.39981289766729
the loss after processing this epoch is:  16.008887951262295
the loss after processing this epoch is:  15.867740739136934
the loss after processing this epoch is:  14.200564302504063
the loss after processing this epoch is:  14.575025220867246
the loss after processing this epoch is:  12.067018299363554
Training completed.
=*==*==*==*==*==*==*==*==*==*==*==*==*==*==*==*==*==*==*==*=


In [23]:
with torch.no_grad(): 
    batch = next(iter(train_loader))
    images, labels = batch
    all_out = model.get_features(images)
    all_labels = labels 
    
    first_batch = len(images)
    
    for batch in train_loader:
        images, labels = batch 
        all_out = torch.cat((all_out, model.get_features(images)), 0)
        all_labels = torch.cat((all_labels, labels), 0)

    for batch in unlabeled_loader:
        images, labels = batch
        all_out = torch.cat((all_out, model.get_features(images)), 0)
        unlabel = torch.tensor([-1]*len(labels))
        all_labels = torch.cat((all_labels, unlabel), 0)

all_out = all_out[first_batch:]
all_labels = all_labels[first_batch:]

In [24]:
print(all_out.shape)
print(all_labels.shape)

torch.Size([14809, 50])
torch.Size([14809])


In [25]:
test_images, test_labels = next(iter(test_loader))
test_features = model.get_features(test_images)

with torch.no_grad(): 
    for images, labels in test_loader:
        test_features = torch.cat((test_features, model.get_features(images)), 0)
        test_labels = torch.cat((test_labels, labels), 0)

test_features = test_features[first_batch:]
test_labels = test_labels[first_batch:]

In [26]:
print(test_features.shape)
print(test_labels.shape)

torch.Size([1848, 50])
torch.Size([1848])


In [27]:
label_spr_model = LabelSpreading(max_iter = 50, kernel = 'knn', alpha = 0.01, n_neighbors = 100, tol = .001)

label_spr_model.fit(all_out.numpy(), all_labels.numpy())

label_spr_model.score(test_features.numpy(), test_labels.numpy())

0.8733766233766234