In [39]:
from tqdm.auto import tqdm
from osgeo import gdal, ogr
import geopandas as gpd
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import os
from pyproj import Proj, Transformer
import rasterio
import natsort
from glob import glob
import cv2


# Data crop

In [6]:
def crop_image_using_coordinates(InputImage, OutputImage, RefImage, latitude, longitude, CropSize):\
    
    #참조 이미지의 boundary를 가져옴 
    Image = gdal.Open(RefImage, gdal.GA_ReadOnly)
    width = Image.RasterXSize
    height = Image.RasterYSize
    Image = None

    """extract vertex coordinates"""
    rds = rasterio.open(RefImage)
    rds.bounds
    left = rds.bounds[0]
    right = rds.bounds[2]
    top = rds.bounds[3]
    bottom = rds.bounds[1]

    resolution_x = (right - left) / width
    resolution_y = (top - bottom) / height

    InputCrs = 'EPSG:4326'
    OutputCrs ='EPSG:4326'
    transformer = Transformer.from_crs(InputCrs, OutputCrs)
    longitude, latitude = transformer.transform(longitude, latitude)
    
    left_box = latitude - (resolution_x * CropSize)
    top_box = longitude + (resolution_y * CropSize)
    right_box = latitude + (resolution_x * CropSize)
    bottom_box = longitude - (resolution_y * CropSize)
    window = (left_box, top_box, right_box, bottom_box)

    gdal.Translate(OutputImage, InputImage, projWin = window)

# Train land data crop

In [56]:
def crop_train(data_n,o_data,crop_size):
    filepath=f'../data/geo_data/crop/train/{data_n}'
    os.makedirs(filepath, exist_ok=True)
    
    print('#'*20)
    print(f"Start get {data_n} information")
    tmp=natsort.natsorted(glob(filepath+"/*.tif"))
    
    if(len(tmp)==len(o_data)):
        print(f"--{data_n} data already existed")
    else:
        print("-- Not enough file please check")
    return
    
    InputImage = f'../data/geo_data/raw/{data_n}_gw.tif'   
    RefImage = '../data/gw_boundary/boundary_blank_resized.tif'
    
    for i in tqdm(range(len(o_data))):
        OutputImage = filepath+'/Crop_'+data_n+'_'+str(i)+'.tif'
        
        lon=o_data['lon'][i]
        lat=o_data['lat'][i]
        
        crop_image_using_coordinates(InputImage, OutputImage, RefImage, lon, lat, crop_size)
    print(f"{data_n} Train data crop complete")

In [57]:
train_data=pd.read_csv("../data/train_data.csv")
crop_train('Height',train_data,1)
crop_train('Slope',train_data,1)
#crop_train('Landuse',train_data,1)
#crop_train('population_density',train_data,1)
#crop_train('NDVI',train_data,1)

####################
Start get Height information
--Height data already existed
####################
Start get Slope information
--Slope data already existed


# Test land data crop

In [26]:
# 강원도 경계
N = 38.61370931
E = 129.359995
S = 37.03353708
W = 127.0950376

width = (E-W)/399
height= (N-S)/277

width_num,height_num=[], []
for i in range(400):
    width_num.append(W+width*i)
width_num=[round(i,7) for i in width_num]
for i in range(278):
    height_num.append(N-height*i)
height_num=[round(i,7) for i in height_num]

In [27]:
def crop_test(data_n,crop_size,width_num,height_num):
    filepath=f'../data/geo_data/crop/test/{data_n}'
    os.makedirs(filepath, exist_ok=True)
    
    InputImage = f'../data/geo_data/raw/{data_n}_gw.tif'   
    RefImage = '../data/gw_boundary/boundary_blank_resized.tif'
    
    num=0
    for lat in height_num:
        for lon in width_num:
            OutputImage = filepath+'/Crop_'+data_n+'_'+str(num)+'.tif'
            num=num+1
            crop_image_using_coordinates(InputImage, OutputImage, RefImage, lon, lat, crop_size)

In [28]:
# crop_train('Height',1,width_num,height_num)
# crop_train('Slope',1,width_num,height_num)
# crop_train('Landuse',1,width_num,height_num)
# crop_train('population_density',1,width_num,height_num)
# crop_train('NDVI',1,width_num,height_num)

# Crop data to npy

In [49]:
def convert_npy(data_n,types,crop_size):
    filepath=f'../data/{data_n}_{types}.npy'
    
    if os.path.isfile(filepath):
        print("Already existed")
        return
    
    files=natsort.natsorted(glob(f"../data/geo_data/crop/{types}/{data_n}/*.tif"))

    tif_list=[]
    for i in range(len(files)):
        tmp = cv2.imread(files[i], cv2.IMREAD_COLOR)
        tmp = cv2.cvtColor(tmp, cv2.COLOR_BGR2RGB)
        tmp = cv2.resize(tmp, (crop_size, crop_size))
        tmp = tmp / 255.0
        tif_list.append(tmp)

    data=np.array(tif_list)
    np.save(filepath,data)

In [50]:
convert_npy("Height",'train',24)

Already existed
