# Reproduce Latents

In [2]:
%matplotlib inline

from importlib import reload
import os
import numpy as np
from pkg_resources import resource_filename

import pandas

import h5py
import seaborn as sns
import pandas as pd
#import umap
from ulmo import io as ulmo_io
from ulmo import plotting
from ulmo.ssl.my_util import modis_loader, set_model
from ulmo.ssl.my_util import Params, option_preprocess
from ulmo.ssl import latents_extraction

import torch

from sklearn.manifold import TSNE
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

from matplotlib import pyplot as plt
from tqdm import trange

# Load files

## Option file

In [3]:
# Load options
opt_file = os.path.join(resource_filename('ulmo', 'runs'),
                        'SSL', 'First','experiments', 
                        'base_modis_model', 'opts.json')

In [4]:
opt = Params(opt_file)
opt = option_preprocess(opt)

## Pytorch file

In [5]:
s3_model_path = 's3://modis-l2/modis_simclr_base_model/SimCLR_modis_resnet50_lr_0.05_decay_0.0001_bsz_64_temp_0.07_trial_0_cosine_warm/last.pth'
ulmo_io.download_file_from_s3(os.path.basename(s3_model_path), s3_model_path)

Downloading from s3: last.pth
Done!


## Erdong's reproduced

In [6]:
s3_reproduce_latents = 's3://modis-l2/modis_latents_last_reproduced.h5'
ulmo_io.download_file_from_s3(os.path.basename(s3_reproduce_latents), s3_reproduce_latents)

Downloading from s3: modis_latents_last_reproduced.h5
Done!


In [7]:
with h5py.File('modis_latents_last_reproduced.h5', 'r') as f:
    reproduced = f['modis_latents'][:]
reproduced.shape

(134162, 128)

## Load one image

In [8]:
modis_dataset_path = "s3://modis-l2/PreProc/MODIS_2010_95clear_128x128_inpaintT_preproc_0.8valid.h5"
with ulmo_io.open(modis_dataset_path, 'rb') as f:
    hf = h5py.File(f, 'r')
    img0 = hf['train'][0:1,...]

In [9]:
img0.shape

(1, 1, 64, 64)

# Defs

## Load model

In [10]:
model_path = './'
model_name = "last.pth"
model_path_title = os.path.join(model_path, model_name)

In [11]:
def load_model(opt, model_path, remove_module=True):
    using_gpu = torch.cuda.is_available()
    model, _ = set_model(opt, cuda_use=using_gpu)
    if not using_gpu:
        model_dict = torch.load(model_path, map_location=torch.device('cpu'))
    else:
        model_dict = torch.load(model_path)

    if remove_module:
        new_dict = {}
        for key in model_dict['model'].keys():
            new_dict[key.replace('module.','')] = model_dict['model'][key]
        model.load_state_dict(new_dict)
    else:
        model.load_state_dict(model_dict['model'])
    print("Model loaded")
    return model

## Calculate latents

In [12]:
def calc_latent(model, img, using_gpu):
    modis_data = np.repeat(img, 3, axis=1)
    image_tensor = torch.tensor(modis_data)
    latents_np = latents_extraction.calc_latent(model, image_tensor, using_gpu)
    return latents_np

# Run on CPU

In [16]:
using_gpu = False

## Load model

In [20]:
cpu_model = load_model(opt, model_path_title, remove_module=True)

Model loaded


## Calc latents

In [31]:
with torch.no_grad():
    cpu_latents = calc_latent(cpu_model, img0, using_gpu)

In [32]:
cpu_latents

array([[-0.02050127,  0.07407618,  0.04224342, -0.0722819 ,  0.00294061,
         0.01800058,  0.12333521, -0.04895734, -0.03287374,  0.08677723,
         0.07657994, -0.05697816, -0.05136307, -0.08338682, -0.00944599,
        -0.12602489,  0.0230847 , -0.04223683,  0.0372562 , -0.04805676,
        -0.10619931, -0.13996214,  0.08047187,  0.13067569, -0.13676985,
         0.17556271, -0.05703831,  0.06767135, -0.14134355,  0.12577687,
        -0.07186496, -0.05439653, -0.10089992,  0.0928174 , -0.02457148,
        -0.05033816,  0.03225619, -0.11719637, -0.11773968,  0.01282729,
        -0.00686526, -0.07831512, -0.03915553,  0.0032813 ,  0.1243174 ,
        -0.05613922, -0.08427165,  0.11099236,  0.0152957 ,  0.13387296,
         0.09926886,  0.05570457,  0.06872411,  0.02095909, -0.0634723 ,
        -0.03160271, -0.0385897 , -0.025409  ,  0.05674249, -0.05657056,
         0.07395758, -0.04900097, -0.12068846,  0.04964582, -0.11162041,
         0.03284034,  0.07940092,  0.05017281,  0.1

# Run on 2 GPU

In [13]:
using_gpu = True

In [15]:
gpu_model = load_model(opt, model_path_title, remove_module=False)

Model loaded


In [16]:
with torch.no_grad():
    gpu_latents = calc_latent(gpu_model, img0, using_gpu)

In [17]:
gpu_latents

array([[-0.02050127,  0.07407615,  0.04224341, -0.07228189,  0.00294061,
         0.01800058,  0.12333518, -0.04895733, -0.03287373,  0.0867772 ,
         0.07657992, -0.05697815, -0.05136306, -0.0833868 , -0.00944599,
        -0.12602486,  0.02308469, -0.04223682,  0.03725619, -0.04805674,
        -0.10619929, -0.1399621 ,  0.08047186,  0.13067566, -0.13676982,
         0.17556266, -0.0570383 ,  0.06767134, -0.14134352,  0.12577684,
        -0.07186494, -0.05439652, -0.1008999 ,  0.09281737, -0.02457148,
        -0.05033815,  0.03225618, -0.11719634, -0.11773965,  0.01282729,
        -0.00686526, -0.0783151 , -0.03915552,  0.00328129,  0.12431738,
        -0.05613921, -0.08427163,  0.11099234,  0.0152957 ,  0.13387293,
         0.09926884,  0.05570455,  0.0687241 ,  0.02095909, -0.06347229,
        -0.0316027 , -0.0385897 , -0.02540899,  0.05674247, -0.05657055,
         0.07395757, -0.04900096, -0.12068843,  0.0496458 , -0.11162039,
         0.03284033,  0.0794009 ,  0.05017279,  0.1

In [18]:
torch.cuda.is_available()

True

In [19]:
torch.cuda.device_count()

2

## Erdong's latents

In [20]:
reproduced[:,0]

array([-0.02155386, -0.06343812,  0.1735882 , ..., -0.04469956,
        0.15122856, -0.03635954], dtype=float32)

## Shuffled?

In [21]:
diff = reproduced - gpu_latents

In [22]:
diff.shape

(134162, 128)

In [28]:
coll = np.sum(np.abs(diff), axis=1)
coll.shape

(134162,)

In [29]:
np.argmin(np.abs(coll))

28418

In [30]:
diff[28418,:]

array([ 0.02300929, -0.01443782,  0.08370233, -0.01953413, -0.00470556,
        0.06687014, -0.02280299,  0.01368805, -0.00121663,  0.00035466,
       -0.03584557,  0.08917063,  0.1024095 , -0.07567263, -0.03344906,
        0.00987158, -0.00551604,  0.04641563,  0.09849343,  0.04990767,
       -0.03382512,  0.01376998, -0.03758459, -0.01024165,  0.06636506,
       -0.00242111, -0.00057833,  0.01628938,  0.01175076,  0.00627048,
        0.04728827,  0.02251767,  0.00035165,  0.0178895 , -0.01416013,
        0.0569285 ,  0.01368723, -0.00091407,  0.03235236, -0.02294117,
       -0.01198949, -0.03785446,  0.019293  ,  0.01009876, -0.059648  ,
       -0.00631548,  0.07141049, -0.05201278, -0.02989315,  0.00752009,
       -0.05838902,  0.00725605, -0.01924752, -0.01545174,  0.09455034,
       -0.05258417, -0.00313755,  0.00156968, -0.02284767,  0.02315697,
       -0.02556891,  0.06480764, -0.00995973, -0.00638906,  0.04461364,
       -0.01216662,  0.04516967,  0.01229106,  0.07505825, -0.00

In [27]:
reproduced[72515,:]

array([ 7.26199821e-02,  1.64982919e-02, -1.14336967e-01, -5.90138510e-02,
       -7.93228224e-02, -1.47404820e-02,  4.94753867e-02,  3.99645120e-02,
       -5.14201969e-02, -2.81686969e-02, -4.29565012e-02,  5.49756810e-02,
       -5.77338450e-02,  1.66457206e-01, -8.37837160e-02, -1.74851879e-01,
       -9.54723954e-02, -1.35257110e-01, -9.04405266e-02, -1.04017697e-01,
       -2.29768045e-02, -1.18896505e-02,  9.70488116e-02,  1.45358905e-01,
       -3.56174447e-02,  1.15349524e-01, -9.63498726e-02,  3.16082686e-02,
        5.08358926e-02,  7.50424489e-02,  8.19345284e-03,  1.12717807e-01,
       -4.14534137e-02, -9.73928273e-02,  1.51695669e-01, -3.19805322e-03,
        1.66340172e-01, -5.33876661e-03, -2.35126317e-02, -9.05339271e-02,
       -5.53592704e-02,  8.92295968e-03,  6.80580661e-02, -6.13395981e-02,
       -1.23335429e-01, -3.15948389e-02, -7.38016516e-02, -6.21085502e-02,
        8.84830207e-02,  2.77109474e-01,  9.89952609e-02,  1.14709638e-01,
       -2.78939325e-02,  