# Training the VAE

First, clone repo:

In [17]:
!ssh-keygen -t rsa
!ssh-keyscan -t rsa github.com >> ~/.ssh/known_hosts
!clear
!echo "PUBLIC KEY: (for github.com)"
!cat /root/.ssh/id_rsa.pub

Generating public/private rsa key pair.
Enter file in which to save the key (/root/.ssh/id_rsa): 
Created directory '/root/.ssh'.
Enter passphrase (empty for no passphrase): 
Enter same passphrase again: 
Your identification has been saved in /root/.ssh/id_rsa
Your public key has been saved in /root/.ssh/id_rsa.pub
The key fingerprint is:
SHA256:6mclq4FcHx6xnZOYxsf+d5WBeYdfEiHPvS26C2AiVrc root@b65dd0128e2f
The key's randomart image is:
+---[RSA 3072]----+
|            . .. |
|             +.. |
|      . o     =o.|
|     . o O o ooo=|
|    o o E B   o+*|
|   o + B.*.. . oo|
|    o o o+o .   .|
|     . .+  o .. .|
|      o+    +o . |
+----[SHA256]-----+
# github.com:22 SSH-2.0-babeld-6b2a8a7e
[H[2JPUBLIC KEY: (for github.com)
ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQC/InJEiehUo4JsZn98zDPNDMPietoIbJ4u6WoKQO6aWw0jsoTnetpvvoO1KmE83ZrCmws+v8rAbaFbjx4Uhm+nmgWV/1AlwZTHMwLFgEBudcslk6UxEnfRQZS8QKZY7Prw5sFBnKUnVIBFgu8KAWeQ+nRUl+uU2zuleTZeFQd/wt5yYiZjazDoUSkYCOaws7DhPu9z2FMBv7Jh2RD82vAVRGMusKhKrbZb

In [18]:
!git clone https://github.com/Praccho/GeoLDM.git
%cd GeoLDM-mindy-kim
!git config --global user.email "colab_bot@brown.edu"
!git config --global user.name "Colab Bot"

Cloning into 'GeoLDM'...
remote: Enumerating objects: 316651, done.[K
remote: Counting objects: 100% (22/22), done.[K
remote: Compressing objects: 100% (16/16), done.[K
remote: Total 316651 (delta 8), reused 11 (delta 5), pack-reused 316629[K
Receiving objects: 100% (316651/316651), 11.99 GiB | 35.87 MiB/s, done.
Resolving deltas: 100% (437/437), done.
Updating files: 100% (184776/184776), done.
[Errno 2] No such file or directory: 'GeoLDM-mindy-kim'
/content/GeoLDM


Install dependencies:

In [20]:
!pip install -r requirements.txt

/content/GeoLDM/GeoLDM


In [21]:
!pip install satlaspretrain-models



In [22]:
import satlaspretrain_models
import os, sys
from PIL import Image
from tqdm import tqdm
import torchvision.transforms as transforms
import torch
from torch.utils.data import DataLoader
from data.datasets import StreetSatTrain, StreetSatTest, StreetSatVal
from main import StreetSatDataModule

Version Checks:

In [23]:
import torch
print(torch.__version__)
print(torch.cuda.is_available())
print(torch.version.cuda)

2.2.1+cu121
True
12.1


## Mounting to Drive

Mount to drive to save logs there, rather than on runtime.

In [24]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [25]:
class Satlas:
    def __init__(self):
        # initialize a Weights instance
        self.weights_manager = satlaspretrain_models.Weights()

        # initialize Swin-v2-Base model for single images, RGB
        # fpn = feature pyramid network to combine coarse and fine grained representations
        self.model = self.weights_manager.get_pretrained_model(model_identifier="Aerial_SwinB_SI", fpn=True)

    def feature_map(self, sat_img):
        # retrieving second feature map so outputted size is 16x16
        return self.model(sat_img)[2]


In [39]:
# initialize model
model = Satlas()
print("Initialized Model")

train_cfg = {'target': 'data.datasets.StreetSatTrain'}
test_cfg = {'target': 'data.datasets.StreetSatTest'}
val_cfg = {'target': 'data.datasets.StreetSatVal'}

# initialize destination directories
to_dir = '/content/GeoLDM/data/val/satemb'
data_loader = StreetSatDataModule(8, val=val_cfg)
data_loader.setup()
samples = data_loader._val_dataloader()
print("Initialized DataLoader")

# load data from dataloader
for batch in tqdm(samples):
    lats, lngs, sat_imgs = batch['latitude'], batch['longitude'], batch['satellite_image']
    inds = []
    outpaths = []
    for i, (lat, lng) in enumerate(zip(lats, lngs)):
        filename = f"{lat},{lng}_satemb.pt"
        outpath = os.path.join(to_dir, filename)

        if os.path.exists(outpath):
            continue

        inds.append(i)
        outpaths.append(outpath)

    inds = torch.tensor(inds).int()
    sat_imgs = torch.index_select(sat_imgs, 0, inds)

    if len(sat_imgs) == 0:
        continue

    # expected input is an Aerial (0.5-2m/px high-res imagery)
    # The 0-255 pixel values should be divided by 255 so they are 0-1.
    norm_imgs = ((sat_imgs + 1) * 127.5) / 255.0
    norm_imgs = norm_imgs.permute(0, 3, 1, 2).to(memory_format=torch.contiguous_format)

    feature_maps = model.feature_map(norm_imgs)
    feature_maps = feature_maps.to(torch.float16)

    for outpath, feature_map in zip(outpaths, feature_maps):
        torch.save(feature_map.clone(), outpath)

Initialized Model
Initialized DataLoader


100%|██████████| 210/210 [00:10<00:00, 20.44it/s]


In [44]:
print(len(os.listdir("/content/GeoLDM/data/val/satemb")))
print(len(os.listdir("/content/GeoLDM/data/val/satellite")))

1673
1673


In [46]:
%cd ..
!git commit -m "satembs"
!git push

[Errno 2] No such file or directory: 'GeoLDM'
/content/GeoLDM/GeoLDM
On branch main
Your branch is up to date with 'origin/main'.

nothing to commit, working tree clean
fatal: could not read Username for 'https://github.com': No such device or address


In [16]:
print(torch.load('/content/drive/My Drive/cs1470/sat_embeds/48.62187978909208,-109.7452346982341_satemb.pt').shape)

torch.Size([128, 16, 16])
