In [4]:
import numpy as np
# from scipy import interpolate
# import matplotlib.pyplot as plt
from osgeo import gdal
# import pandas as pd
# from sklearn.model_selection import train_test_split
import torch
import torchvision
from torch.utils.data import Dataset
import os

In [5]:
import torch
import math
# this ensures that the current MacOS version is at least 12.3+
print(torch.backends.mps.is_available())
# this ensures that the current current PyTorch installation was built with MPS activated.
print(torch.backends.mps.is_built())

True
True


In [6]:
dtype = torch.float
device = torch.device("mps")

# Create random input and output data
x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

# Randomly initialize weights
a = torch.randn((), device=device, dtype=dtype)
b = torch.randn((), device=device, dtype=dtype)
c = torch.randn((), device=device, dtype=dtype)
d = torch.randn((), device=device, dtype=dtype)

learning_rate = 1e-6
for t in range(2000):
    # Forward pass: compute predicted y
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    # Compute and print loss
    loss = (y_pred - y).pow(2).sum().item()
    if t % 100 == 99:
        print(t, loss)

# Backprop to compute gradients of a, b, c, d with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()

    # Update weights using gradient descent
    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d


print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')

99 6748.6240234375
199 4496.638671875
299 2998.41552734375
399 2001.2752685546875
499 1337.359619140625
599 895.1209716796875
699 600.4089965820312
799 403.91571044921875
899 272.84210205078125
999 185.36087036132812
1099 126.94164276123047
1199 87.90705108642578
1299 61.808937072753906
1399 44.34877395629883
1499 32.65983581542969
1599 24.82890510559082
1699 19.578859329223633
1799 16.056421279907227
1899 13.691228866577148
1999 12.101792335510254
Result: y = 0.032451845705509186 + 0.8096705675125122 x + -0.005598484072834253 x^2 + -0.08663506805896759 x^3


In [27]:
file_path = '/Volumes/SamDick/Grad Project/Data/ANH4/'
ndv = 3.4028230607371e+38

In [3]:
data_r = gdal.Open(file_path + 'R5_30GN1.TIF', gdal.GA_ReadOnly)
# print(data_r.RasterCount)  1
band_r = data_r.GetRasterBand(1)
band_r.SetNoDataValue(ndv)
# no data value -> 3.4028235e+38
ele_r = band_r.ReadAsArray()

In [4]:
data_m = gdal.Open(file_path + 'M5_30GN1.TIF', gdal.GA_ReadOnly)
# print(data_r.RasterCount)  1
band_m = data_m.GetRasterBand(1)
band_m.SetNoDataValue(ndv)
# no data value -> 3.4028235e+38
ele_m = band_m.ReadAsArray()

In [5]:
ele_m

array([[3.4028235e+38, 3.4028235e+38, 3.4028235e+38, ..., 1.1067260e+01,
        1.0528842e+01, 9.7950745e+00],
       [3.4028235e+38, 3.4028235e+38, 3.4028235e+38, ..., 1.0511560e+01,
        9.7509756e+00, 9.2790194e+00],
       [3.4028235e+38, 3.4028235e+38, 3.4028235e+38, ..., 9.6854563e+00,
        9.2926826e+00, 9.3123627e+00],
       ...,
       [2.0991859e+00, 2.4844029e+00, 3.4905410e+00, ..., 2.0533800e-01,
        1.7054400e-01, 2.0660500e-01],
       [2.7304480e+00, 3.7161262e+00, 4.8983092e+00, ..., 1.8084900e-01,
        1.9876900e-01, 1.4985800e-01],
       [4.1302629e+00, 5.2878218e+00, 5.7337070e+00, ..., 2.2286700e-01,
        1.1971900e-01, 1.1189200e-01]], dtype=float32)

In [5]:
masked_r = np.ma.masked_where(ele_r > 10000, ele_r)
masked_m = np.ma.masked_where(ele_m > 10000, ele_m)

In [13]:
masked_r

masked_array(
  data=[[--, --, --, ..., 11.236674308776855, 10.71141529083252,
         9.851737022399902],
        [--, --, --, ..., 10.525714874267578, 9.752251625061035,
         9.279019355773926],
        [--, --, --, ..., 9.685456275939941, 9.292682647705078,
         9.31330394744873],
        ...,
        [4.363124847412109, 7.6907830238342285, 12.57359504699707, ...,
         0.5724790096282959, 0.6216700077056885, 1.774999976158142],
        [5.719004154205322, 7.099827766418457, 16.24476432800293, ...,
         1.1711349487304688, 0.6391350030899048, 0.9425870180130005],
        [7.22181510925293, 12.524460792541504, 18.087730407714844, ...,
         1.1704609394073486, 0.505062997341156, 0.12686499953269958]],
  mask=[[ True,  True,  True, ..., False, False, False],
        [ True,  True,  True, ..., False, False, False],
        [ True,  True,  True, ..., False, False, False],
        ...,
        [False, False, False, ..., False, False, False],
        [False, False, Fals

In [6]:
def pad_with(vector, pad_width, iaxis, kwargs):
    pad_value = kwargs.get('padder', 10)
    vector[:pad_width[0]] = pad_value
    vector[-pad_width[1]:] = pad_value

In [7]:
padded_ele_m = np.pad(ele_m, 2, pad_with, padder=100000)

In [8]:
masked_pad_m = np.ma.masked_where(padded_ele_m > 10000, padded_ele_m)

In [11]:
x_ = np.arange(0, padded_ele_m.shape[1])
y_ = np.arange(0, padded_ele_m.shape[0])

xx, yy = np.meshgrid(x_, y_)
#get only the valid values
x1 = xx[~masked_pad_m.mask]
y1 = yy[~masked_pad_m.mask]
newarr = masked_pad_m[~masked_pad_m.mask]

GD_m = interpolate.griddata((x1, y1), newarr.ravel(), (xx, yy), method='nearest')

In [12]:
padded_ele_r = np.pad(ele_r, 2, pad_with, padder=100000)
masked_pad_r = np.ma.masked_where(padded_ele_r > 10000, padded_ele_r)

x_ = np.arange(0, padded_ele_r.shape[1])
y_ = np.arange(0, padded_ele_r.shape[0])

xx, yy = np.meshgrid(x_, y_)
#get only the valid values
x1 = xx[~masked_pad_r.mask]
y1 = yy[~masked_pad_r.mask]
newarr = masked_pad_r[~masked_pad_r.mask]

GD_r = interpolate.griddata((x1, y1), newarr.ravel(), (xx, yy), method='nearest')

In [30]:
GD_r

array([[1.2550894, 1.2550894, 1.2550894, ..., 9.795074 , 9.795074 ,
        9.795074 ],
       [1.2550894, 1.2550894, 1.2550894, ..., 9.795074 , 9.795074 ,
        9.795074 ],
       [1.2402725, 1.2402725, 1.2550894, ..., 9.795074 , 9.795074 ,
        9.795074 ],
       ...,
       [4.130263 , 4.130263 , 4.130263 , ..., 0.111892 , 0.111892 ,
        0.111892 ],
       [4.130263 , 4.130263 , 4.130263 , ..., 0.111892 , 0.111892 ,
        0.111892 ],
       [4.130263 , 4.130263 , 4.130263 , ..., 0.111892 , 0.111892 ,
        0.111892 ]], dtype=float32)

In [51]:
# data_m_list = []
# data_ele_list = []
index_list = []
label_list = []
# tree_height = []
for i in range(ele_r.shape[0]):
    for j in range(ele_r.shape[1]):
        if masked_r.mask[i][j] == False:
#             pixel = [GD1[i+1][j+1], GD1[i+1][j+2], GD1[i+1][j+3], GD1[i+2][j+1], GD1[i+2][j+2], GD1[i+2][j+3], GD1[i+3][j+1], GD1[i+3][j+2], GD1[i+3][j+3]]
#             data_list.append(pixel)
            index_list.append((i, j))
            if masked_m.mask[i][j] == False:
                if masked_r.data[i][j]-masked_m.data[i][j]>2.0:
                    label_list.append(int(1))
                else:
                    label_list.append(int(0))
            else:
                label_list.append(int(1))

In [32]:
count = 0
for i in label_list:
    if i == 1:
        count += 1
print(count, " / ", len(label_list))

428944  /  1080624


In [52]:
# use train_test_split twice to split train / val / test

index_train, index_test, label_train, label_test = train_test_split(index_list, label_list, test_size=0.2)
index_train, index_val, label_train, label_val = train_test_split(index_train, label_train, test_size=0.25)

# mean_train = np.mean(np.array(data_train))
# std_train = np.std(np.array(data_train))
# mean_test = np.mean(np.array(data_test))
# std_test = np.std(np.array(data_test))

In [53]:
data_m_train = []
data_m_test = []
data_m_val = []

data_ele_train = []
data_ele_test = []
data_ele_val = []

for it in index_train:
    data_m_train.append(GD_m[it[0]+2][it[1]+2])
    if masked_m.mask[i][j] == False:
        data_ele_train.append(masked_r.data[it[0]][it[1]]-masked_m.data[it[0]][it[1]])
    else:
        data_ele_train.append(np.nan)

for it in index_test:
    data_m_test.append(GD_m[it[0]+2][it[1]+2])
    if masked_m.mask[i][j] == False:
        data_ele_test.append(masked_r.data[it[0]][it[1]]-masked_m.data[it[0]][it[1]])
    else:
        data_ele_test.append(np.nan)

for it in index_val:
    data_m_val.append(GD_m[it[0]+2][it[1]+2])
    if masked_m.mask[i][j] == False:
        data_ele_val.append(masked_r.data[it[0]][it[1]]-masked_m.data[it[0]][it[1]])
    else:
        data_ele_val.append(np.nan)

In [54]:
mean_m_train = np.mean(np.array(data_m_train), dtype=np.float64)
std_m_train = np.std(np.array(data_m_train), dtype=np.float64)
mean_ele_train = np.mean(np.array(data_ele_train), dtype=np.float64)
std_ele_train = np.std(np.array(data_ele_train), dtype=np.float64)
mean_m_test = np.mean(np.array(data_m_test), dtype=np.float64)
std_m_test = np.std(np.array(data_m_test), dtype=np.float64)
mean_ele_test = np.mean(np.array(data_ele_test), dtype=np.float64)
std_ele_test = np.std(np.array(data_ele_test), dtype=np.float64)
mean_m_val = np.mean(np.array(data_m_val), dtype=np.float64)
std_m_val = np.std(np.array(data_m_val), dtype=np.float64)
mean_ele_val = np.mean(np.array(data_ele_val), dtype=np.float64)
std_ele_val = np.std(np.array(data_ele_val), dtype=np.float64)

In [55]:
display(mean_m_train, std_m_train, mean_ele_train, std_ele_train, mean_m_test, std_m_test, mean_ele_test, std_ele_test, mean_m_val, std_m_val, mean_ele_val, std_ele_val)

5.287058628360828

5.169393945443095

-2.7438857275207777e+37

9.26502447601632e+37

5.271529781062127

5.162737729876422

-2.7553226448463877e+37

9.282616153663652e+37

5.284413252685591

5.161313305796146

-2.7172204688433687e+37

9.223823972243028e+37

In [None]:
transform_train = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(
        mean=[mean_train],
        std=[std_train],
    ),
])

transform_test = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(
        mean=[mean_test],
        std=[std_test],
    ),
])

# output[channel] = (input[channel] - mean[channel]) / std[channel]

In [56]:
data_ML_path = '/Volumes/SamDick/Grad Project/Data/ML/'

In [57]:
def normalize(value, mean, std):
    return (value - mean)/std

In [59]:
count = 0
dict_dl = {'file_name':[], 'label':[]}
for i in range(3000):
    file_name_ = data_ML_path+'train/'+str(count)+'.npy'
    list_temp = []
    for i_ in range(1, 4):
        for j_ in range(1, 4):
            normalized_value = normalize(GD_m[index_train[i][0]+i_][index_train[i][1]+j_], mean_m_train, std_m_train) +\
            normalize(GD_r[index_train[i][0]+i_][index_train[i][1]+j_] - GD_m[index_train[i][0]+i_][index_train[i][1]+j_], mean_ele_train, std_ele_train)
            
            list_temp.append(normalized_value)
    np.save(file_name_, np.array(list_temp).reshape(3,3))
    dict_dl['file_name'].append(str(count)+'.npy')
    dict_dl['label'].append(label_train[i])
    count = count+1

for j in range(1000):
    file_name_ = data_ML_path+'test/'+str(count)+'.npy'
    list_temp = []
    for i_ in range(1, 4):
        for j_ in range(1, 4):
            normalized_value = normalize(GD_m[index_test[j][0]+i_][index_test[j][1]+j_], mean_m_test, std_m_test) +\
            normalize(GD_r[index_test[j][0]+i_][index_test[j][1]+j_] - GD_m[index_test[j][0]+i_][index_test[j][1]+j_], mean_ele_test, std_ele_test)
            
            list_temp.append(normalized_value)
    np.save(file_name_, np.array(list_temp).reshape(3,3))
    dict_dl['file_name'].append(str(count)+'.npy')
    dict_dl['label'].append(label_test[j])
    count = count+1
    
for k in range(1000):
    file_name_ = data_ML_path+'val/'+str(count)+'.npy'
    list_temp = []
    for i_ in range(1, 4):
        for j_ in range(1, 4):
            normalized_value = normalize(GD_m[index_val[k][0]+i_][index_val[k][1]+j_], mean_m_val, std_m_val) +\
            normalize(GD_r[index_val[k][0]+i_][index_val[k][1]+j_] - GD_m[index_val[k][0]+i_][index_val[k][1]+j_], mean_ele_val, std_ele_val)
            
            list_temp.append(normalized_value)
    np.save(file_name_, np.array(list_temp).reshape(3,3))
    dict_dl['file_name'].append(str(count)+'.npy')
    dict_dl['label'].append(label_test[j])
    count = count+1

In [60]:
np.load(data_ML_path+'train/'+'0.npy')

array([[0.34990677, 0.43514149, 0.50346652],
       [0.29272361, 0.40154082, 0.53656014],
       [0.32406323, 0.33970311, 0.47502405]])

In [61]:
df = pd.DataFrame.from_dict(dict_dl)
df

Unnamed: 0,file_name,label
0,0.npy,0
1,1.npy,0
2,2.npy,0
3,3.npy,0
4,4.npy,0
...,...,...
4995,4995.npy,0
4996,4996.npy,0
4997,4997.npy,0
4998,4998.npy,0


In [62]:
df.to_csv(data_ML_path+'labels.csv', index=False)

In [None]:
class TinyData(Dataset):
    def __init__(self, setname):
        """
        Variables:
       <setname> can be any of: 'train' to specify the training set
                                'test' to specify the test set"""
        self.setname = setname
        assert setname in ['train','test']
        
        #Define dataset
        overall_dataset_dir = '/Volumes/SamDick/Grad Project/Data/ML'
        self.selected_dataset_dir = os.path.join(overall_dataset_dir,setname)
        
        #E.g. self.all_filenames = ['006.png','007.png','008.png'] when setname=='val'
        self.all_filenames = os.listdir(self.selected_dataset_dir)
        self.all_labels = pd.read_csv(os.path.join(overall_dataset_dir,'labels.csv'),header=0,index_col=0)
        self.label_meanings = self.all_labels.columns.values.tolist()
    
    def __len__(self):
        """Return the total number of examples in this split, e.g. if
        self.setname=='train' then return the total number of examples
        in the training set"""
        return len(self.all_filenames)
        
    def __getitem__(self, idx):
        """Return the example at index [idx]. The example is a dict with keys
        'data' (value: Tensor for an RGB image) and 'label' (value: multi-hot
        vector as Torch tensor of gr truth class labels)."""
        selected_filename = self.all_filenames[idx]
#         imagepil = PIL.Image.open(os.path.join(self.selected_dataset_dir,selected_filename)).convert('RGB')
        
#         #convert image to Tensor and normalize
#         image = utils.to_tensor_and_normalize(imagepil)
        
        npy = np.load(os.path.join(self.selected_dataset_dir,selected_filename))
        pimg = torch.from_numpy(npy)
        
        #load label
        label = torch.Tensor(self.all_labels.loc[selected_filename,:].values)
        
        sample = {'data':pimg, #preprocessed image, for input into NN
                  'label':label,
                  'img_idx':idx}
        return sample

In [None]:
train_dataset = TinyData(setname = 'train')
test_dataset = TinyData(setname = 'test')

In [None]:
len(test_dataset)

In [None]:
len(test_dataset)

In [None]:
simple = train_dataset[0]
simple['data']

In [None]:
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # 1 input image channel, 6 output channels, 5x5 square convolution
        # kernel
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(16 * 5 * 5, 120)  # 5*5 from image dimension
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        # If the size is a square, you can specify with a single number
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = torch.flatten(x, 1) # flatten all dimensions except the batch dimension
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net()
print(net)

In [None]:
np.save(file_path+'test.npy', transform_train(np.array(data_train[5000]).reshape(3,3)).numpy())

In [None]:
data_np = np.array(data_list)

In [None]:
data_np

In [None]:
X, y = np.arange(10).reshape((5, 2)), range(5)

In [13]:
import rasterio
from affine import Affine

In [14]:
dst_crs='EPSG:28992'
out_file = file_path + 'M5_30GN1_ip.TIF'
afn = Affine.from_gdal(*data_r.GetGeoTransform())
with rasterio.open(
    out_file,
    'w',
    driver='GTiff',
    height=padded_ele_r.shape[0],
    width=padded_ele_r.shape[1],
    count=1,
    dtype=np.float32,
    crs=data_m.GetProjection(),
    transform=afn,
) as dest_file:
    dest_file.write(GD_m, 1)
dest_file.close()

In [15]:
data_m.GetGeoTransform()

(80000.0, 5.0, 0.0, 462500.0, 0.0, -5.0)

In [26]:
driver = gdal.GetDriverByName("GTiff")
out_file = file_path + 'M5_30GN1_ip.TIF'

outdata = driver.Create(out_file, padded_ele_m.shape[1],  padded_ele_m.shape[0], 1, gdal.GDT_UInt16)
outdata.SetGeoTransform(data_m.GetGeoTransform())##sets same geotransform as input
outdata.SetProjection(data_m.GetProjection())##sets same projection as input
outdata.GetRasterBand(1).WriteArray(GD_m)
# outdata.GetRasterBand(1).SetNoDataValue(10000)##if you want these values transparent
outdata.FlushCache() ##saves to disk!!
outdata = None