# Results Notebooks
To run the training of DFC and DEC please use main.ipynb or main.py

In this Notebook we focus on simply loading the models and demonstrating the results for the final DFC with its encoder or the DEC with encoder.
* For this Notebook to work you need to change the ArgsDFC providing all the paths for the encoders and DFC 
* Two examples are given at the end of the notebook; Office31 data and mnist with ups

In [1]:
import argparse
import numpy as np
from sklearn.metrics import normalized_mutual_info_score

import torch
from torch import nn
import plotly
from dataloader import get_dataset
from kmeans import get_cluster_centers
from module import Encoder
from adverserial import adv_loss
from eval import predict, cluster_accuracy, balance
from utils import set_seed, AverageMeter, target_distribution, aff, inv_lr_scheduler
import os
import wandb  # Used to log progress and plot graphs. 
from vae import DFC_VAE
from vae import train as train_vae
from dfc import train as train_dfc
from dec import train as train_dec
from dfc import DFC
from resnet50_finetune import *
import torchvision.models as models

import pytorch_lightning as pl
from pl_bolts.models.autoencoders import VAE
import pandas as pd
from ArgsDFC import args as arg_class



PyTorch Version:  1.7.1
Torchvision Version:  0.8.2


In [2]:

#Set wandb loging offline, avoid the need for an account.
wandbrun = wandb.init(project="offline-run")

os.environ["WANDB_MODE"] = "dryrun"

Failed to query for notebook name, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable
[34m[1mwandb[0m: Offline run mode, not syncing to the cloud.
[34m[1mwandb[0m: W&B syncing is set to `offline` in this directory.  Run `wandb online` to enable cloud syncing.


## Arguments
Change the Arguments in the at the end of the notebook with the main run code. Arguments are given in file Args_notebook.py.

## Define Functions

In [3]:
def get_encoder(args, log_name, legacy_path, path, dataloader_list, device='cpu', encoder_type='vae'):
    if encoder_type == 'vae':
        print('Loading the variational autoencoder')
        if legacy_path:
            encoder = Encoder().to(device)
            encoder.load_state_dict(torch.load(
                legacy_path, map_location=device))
        else:
            if path:
                model = DFC_VAE.load_from_checkpoint(path).to(device)
            else:
                model = train_vae(args, log_name,  dataloader_list, args.input_height,
                                  is_digit_dataset=args.digital_dataset, device=device).to(device)
            encoder = model.encoder
    elif encoder_type == 'resnet50':  # Maybe fine tune resnet50 here
        print('Loading the RESNET50 encoder')
        if path:            
            print('from pretrained file')
            encoder = models.resnet50(pretrained=False)
            encoder.load_state_dict(torch.load(path))
        else:
            encoder = models.resnet50(pretrained=True, progress=True)
        set_parameter_requires_grad(encoder, req_grad=False)
        encoder = encoder.to(device)
    else:
        raise NameError('The encoder_type variable has an unvalid value')
    wandb.watch(encoder)
    return encoder


In [4]:
def subgroups_encoders(args,device):
        print("Loading the golden standard group 0 encoder")
        encoder_group_0 = get_encoder(args, "encoder_0", args.encoder_0_legacy_path, args.encoder_0_path, [
                                      dataloader_0], device=device, encoder_type=args.encoder_type)
        
        print("Loading the golden standard group 1 encoder")
        encoder_group_1 = get_encoder(args, "encoder_1", args.encoder_1_legacy_path, args.encoder_1_path, [
                                      dataloader_1], device=device, encoder_type=args.encoder_type)
       
       
        return encoder_group_0, encoder_group_1

In [5]:
def get_dec_groups(args, device):   
        print("Load group 0 initial cluster definitions")
        cluster_centers_0 = get_cluster_centers(file_path=args.cluster_0_path, device=device)

        print("Load group 1 initial cluster definitions")
        cluster_centers_1 = get_cluster_centers(file_path=args.cluster_number, device=device)

        #Load DEC pretrained with the weight of the fairness losses are set to 0.
        # making this a DEC instead of a DFC 
        print("Load golden standard group 0 DEC")        
        dfc_group_0 = DFC(cluster_number=args.cluster_number, hidden_dimension=args.dfc_hidden_dim).to(device)
        dec.load_state_dict(torch.load(args.dfc_0_path, map_location=device))
        print("Load golden standard group 1 DEC")        
        dfc_group_0 = DFC(cluster_number=args.cluster_number,hidden_dimension=args.dfc_hidden_dim).to(device)
        dec.load_state_dict(torch.load(args.dfc_1_path, map_location=device))

        return cluster_centers_0, cluster_centers_1, dfc_group_0, dfc_group_1 
       

In [6]:
def get_dfc_module(args,device):
    print("Load DFC")
    dfc = DFC(cluster_number=args.cluster_number,
                  hidden_dimension=args.dfc_hidden_dim).to(device)
    dfc.load_state_dict(torch.load(args.dfc_path, map_location=device))

   
    return dfc

In [7]:
def eval_results(args, dataloader_list, encoder, dfc, device):
    encoder.eval()
    dfc.eval()
    print("Evaluate model")
    predicted, labels = predict(dataloader_list, encoder, dfc, device=device,encoder_type = args.encoder_type)
    predicted, labels = predicted.cpu().numpy(), labels.numpy()
    print("Calculating cluster accuracy")
    _, accuracy = cluster_accuracy(predicted, labels, args.cluster_number)
    nmi = normalized_mutual_info_score(labels, predicted, average_method="arithmetic")
    len_image_0 = len(dataloader_list[0])
    print("Calculating balance")
    bal, en_0, en_1 = balance(predicted, len_image_0, k =args.cluster_number)
    save_name = args.dataset 
    print(f"{save_name} Train Accuracy:", accuracy, f"{save_name} Train NMI:", nmi, f"{save_name} Train Bal:", bal,'\n',
            f"{save_name} Train Entropy 0:", en_0, '\n',
            f"{save_name} Train Entropy 1:", en_1)
  

## Main Code

In [8]:
def main_pipeline(args):    
    set_seed(args.seed)    
    os.makedirs(args.log_dir, exist_ok=True)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    if torch.cuda.is_available():
        torch.cuda.set_device(args.gpu)
    print(f"Using {device}")


    dataloader_0, dataloader_1 = get_dataset[args.dataset](args)
    dataloader_list = [dataloader_0, dataloader_1]
    print("Loading Encoder type:",args.encoder_type)
    encoder = get_encoder(args, "encoder", args.encoder_legacy_path, args.encoder_path, dataloader_list, device=device, encoder_type=args.encoder_type)
    print("Running method", args.method)
    if args.method == 'dfc':
        # encoder_group_0, encoder_group_1, = subgroups_encoders(args,device)
        # cluster_centers_0, cluster_centers_1, dfc_group_0, dfc_group_1 = get_dec_groups(args,device)
        dfc = get_dfc_module(args,device)
        eval_results(args,dataloader_list, encoder, dfc, device=device)
    if args.method == 'dec':
        print("Load cluster centers for final DEC")
        cluster_centers = get_cluster_centers(args, encoder, args.cluster_number, [dataloader_0, dataloader_1],
                                              args.cluster_path, device=device, save_name="clusters_dec")

        print("Train final DEC")
        dec = get_dec(args,
                      encoder, "DEC", device=device, centers=cluster_centers)
    del encoder
    del dfc


## DFC Run
In this section we have the cells with all the steps to train a DFC.   
We first load the encoders used for for the two dec, if no path is selected then we train new ones.
Next we load or train with K-means the cluster centers 

In [9]:
# # Example run.
# mnist_ups_args = arg_class()
# mnist_ups_args.set_mnist_ups()
# main_pipeline(mnist_ups_args)

In [10]:
office_args = arg_class()
office_args.set_office31_load_models()
main_pipeline(office_args)

Using cuda
Loading Encoder type: resnet50
Loading the RESNET50 encoder
from pretrained file
Running method dfc
Load DFC
Evaluate model
Calculating cluster accuracy
Calculating balance
office_31 Train Accuracy: 0.6763392857142857 office_31 Train NMI: 0.716612082292925 office_31 Train Bal: 6.369426751592358e-08 
 office_31 Train Entropy 0: 2.6890848577221345 
 office_31 Train Entropy 1: 3.3934453185999707


In [11]:
mtfl_args = arg_class()
mtfl_args.set_mtfl_load_models()
main_pipeline(mtfl_args)

Using cuda
Loading Encoder type: resnet50
Loading the RESNET50 encoder
from pretrained file
Running method dfc
Load DFC
Evaluate model
Calculating cluster accuracy
Calculating balance
mtfl Train Accuracy: 0.7485977564102564 mtfl Train NMI: 0.21403839260800497 mtfl Train Bal: 0.0011003117549972493 
 mtfl Train Entropy 0:0.6889892386783225 
 mtfl Train Entropy 1: 0.6887447313219119


In [12]:
#Finish loggin in wandb
wandbrun.finish()

[34m[1mwandb[0m: You can sync this run to the cloud by running:
[34m[1mwandb[0m: [33mwandb sync /mnt/f/Documents/Amsterdam/UvA/Block 3 -  Fairness, Accountability, Confidentiality and Transparency in AI/FACT-2021/wandb/offline-run-20210129_235358-336tc8dx[0m
