In [1]:
import csv
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.models as models
import torch.backends.cudnn as cudnn
from torch.utils.data import Dataset
import pandas as pd
import warnings
import os
import geoio
import copy
from PIL import Image
import math
import numpy as np

In [2]:
def merge_on_lat_lon(df1, df2, keys=['cluster_lat', 'cluster_lon'], how='inner'):
    """
    Allows two dataframes to be merged on lat/lon
    Necessary because pandas has trouble merging on floats (understandably so)
    """
    df1 = df1.copy()
    df2 = df2.copy()
    
    # must use ints for merging, as floats induce errors
    df1['merge_lat'] = (10000 * df1[keys[0]]).astype(int)
    df1['merge_lon'] = (10000 * df1[keys[1]]).astype(int)
    
    df2['merge_lat'] = (10000 * df2[keys[0]]).astype(int)
    df2['merge_lon'] = (10000 * df2[keys[1]]).astype(int)
    
    df2.drop(keys, axis=1, inplace=True)
    merged = pd.merge(df1, df2, on=['merge_lat', 'merge_lon'], how=how)
    merged.drop(['merge_lat', 'merge_lon'], axis=1, inplace=True)
    return merged

def create_space(lat, lon, s=8):
    """Creates a s km x s km square centered on (lat, lon)"""
    v = (180/math.pi)*(1000/6378137)*s # roughly 0.045 for s=10
    return lat - v, lon , lat , lon + v

NIGHTLIGHTS_DIR = '/home/haoying/VNL_v2_npp_2020_global_vcmslcfg_c202101211500.average.tif'
tif = geoio.GeoImage(NIGHTLIGHTS_DIR)
tif_array = np.squeeze(tif.get_data())

def add_nightlights(df, tif, tif_array):
    ''' 
    This takes a dataframe with columns cluster_lat, cluster_lon and finds the average 
    nightlights in 2015 using a 10kmx10km box around the point
    
    I try all the nighlights tifs until a match is found, or none are left upon which an error is raised
    '''
    cluster_nightlights = []
    for i,r in df.iterrows():
        min_lat, min_lon, max_lat, max_lon = create_space(r.cluster_lat, r.cluster_lon)
        
        xminPixel, ymaxPixel = tif.proj_to_raster(min_lon, min_lat)
        xmaxPixel, yminPixel = tif.proj_to_raster(max_lon, max_lat)
        assert xminPixel < xmaxPixel, print(r.cluster_lat, r.cluster_lon)
        assert yminPixel < ymaxPixel, print(r.cluster_lat, r.cluster_lon)
        if xminPixel < 0 or xmaxPixel >= tif_array.shape[1]:
            print(f"no match for {r.cluster_lat}, {r.cluster_lon}")
            raise ValueError()
        elif yminPixel < 0 or ymaxPixel >= tif_array.shape[0]:
            print(f"no match for {r.cluster_lat}, {r.cluster_lon}")
            raise ValueError()
        xminPixel, yminPixel, xmaxPixel, ymaxPixel = int(xminPixel), int(yminPixel), int(xmaxPixel), int(ymaxPixel)
        cluster_nightlights.append(tif_array[yminPixel:ymaxPixel,xminPixel:xmaxPixel].mean())
        
    df['nightlights'] = cluster_nightlights

In [3]:
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
model=torch.load("/home/haoying/res_zl12_effnet_v3/predict-unlimited.pt", map_location=lambda storage, loc: storage.cuda(1))
model.to(device)

EfficientNet(
  (_conv_stem): Conv2dStaticSamePadding(
    3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False
    (static_padding): ZeroPad2d(padding=(0, 1, 0, 1), value=0.0)
  )
  (_bn0): BatchNorm2d(32, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
  (_blocks): ModuleList(
    (0): MBConvBlock(
      (_depthwise_conv): Conv2dStaticSamePadding(
        32, 32, kernel_size=(3, 3), stride=[1, 1], groups=32, bias=False
        (static_padding): ZeroPad2d(padding=(1, 1, 1, 1), value=0.0)
      )
      (_bn1): BatchNorm2d(32, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
      (_se_reduce): Conv2dStaticSamePadding(
        32, 8, kernel_size=(1, 1), stride=(1, 1)
        (static_padding): Identity()
      )
      (_se_expand): Conv2dStaticSamePadding(
        8, 32, kernel_size=(1, 1), stride=(1, 1)
        (static_padding): Identity()
      )
      (_project_conv): Conv2dStaticSamePadding(
        32, 16, kernel_size=

In [4]:
class TestDataset(Dataset):
    def __init__(self, transform=None):
        self.file_list = list(os.listdir('/home/haoying/data_zl12/'))
#         self.file_list.remove("22.7371-106.0160-2018.png")
#         self.file_list.remove('1.txt')
        self.transform = transform        

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, idx):
        name = self.file_list[idx]
        image = Image.open("/home/haoying/data_zl12/{}".format(name)).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image,name

model.eval()    
    
# Testing part
_mean = [0.485, 0.456, 0.406]
_std = [0.229, 0.224, 0.225]
test_dataset = TestDataset(transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=_mean, std=_std)]))
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=20, shuffle=False, num_workers=0)                              

In [5]:
df = pd.read_csv("/home/haoying/res_zl12_effnet_v3/nightlights.csv").copy()
df['predict'] = -1

In [6]:
df

Unnamed: 0,y_x,cluster_lat,cluster_lon,nightlights,predict
0,-48.779296875_-66.93006025862447.png,43.771094,90.966797,0.379018,-1
1,-44.82421875_-75.97355295343337.png,40.847060,120.058594,0.380550,-1
2,-44.82421875_-70.98834922412489.png,40.847060,102.304688,0.272340,-1
3,-39.111328125_-65.4034447883078.png,36.385913,87.275391,0.390618,-1
4,-50.80078125_-77.86034459764656.png,45.213004,128.320312,0.310670,-1
...,...,...,...,...,...
131947,-38.671875_-69.28725695167886.png,36.031332,97.382812,0.272444,-1
131948,-46.142578125_-72.63337363853837.png,41.836828,107.666016,0.306109,-1
131949,-44.208984375_-66.08936427047087.png,40.380028,88.945312,0.306339,-1
131950,-25.224609375_-74.59010800882324.png,24.447150,114.609375,0.263865,-1


In [8]:
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
with torch.no_grad():
    for batch_idx, (data,name) in enumerate(test_loader):
        data = data.to(device)
        scores = model(data).squeeze()
        count = 0
        for each_name in name:
            df.loc[df['y_x'] == each_name, 'predict'] = scores[count].cpu().data.numpy()
            count += 1

In [9]:
df['predict'].min()

3.613386392593384

In [10]:
df['predict'].max()

6.364897727966309

In [11]:
df.to_csv('/home/haoying/res_zl12_effnet_v3/nightlight_predicted_all.csv', index = False)