In [None]:
from tqdm import tqdm
from osgeo import gdal, ogr
import geopandas as gpd
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import os
from pyproj import Proj, Transformer
import rasterio
import natsort
from glob import glob
import cv2


In [None]:
"""
인구밀도 분류
[0,1,2,3,4,5,10,20,30,40,50,60,70,80,90,100,200,300,400 ,500,1000, 2000,3000, 4000,5000,6000,7000, 8000,9000,10000,11000,12000,13000,14000,15000, 16000, 20000]  

토지이용도 분류
'10': ['제1종일반주거지역', '제2종일반주거지역', '제3종일반주거지역','준주거지역', '제1종전용주거지역','제2종전용주거지역'],  # Residential 빨강
'2': ['일반상업지역', '근린상업지역', '중심상업지역'],                             # Commercial       파랑
'12': ['자연녹지지역', '보전녹지지역', '자연환경보전지역','보전관리지역'],          # Green Area      초록 
'8' : ['농림지역'],                                                             # Agriculuture       연두
'6': ['일반공업지역', '준공업지역', '전용공업지역','생산관리지역','생산녹지지역'],  # Industrial       노랑     
'4': ['기타', '용도미지정','계획관리지역', '관리지역']                            # Miscellaneous       하늘

NDVI 분류
3월~5월 -> 4월
6월~9월 -> 8월
10월~11월 -> 11월
그외 -> 1월
"""


### Data crop function

In [None]:
def crop_image_using_coordinates(InputImage, OutputImage, RefImage, latitude, longitude, CropSize):\
    
    #참조 이미지의 boundary를 가져옴 
    Image = gdal.Open(RefImage, gdal.GA_ReadOnly)
    width = Image.RasterXSize
    height = Image.RasterYSize
    Image = None

    """extract vertex coordinates"""
    rds = rasterio.open(RefImage)
    rds.bounds
    left = rds.bounds[0]
    right = rds.bounds[2]
    top = rds.bounds[3]
    bottom = rds.bounds[1]

    resolution_x = (right - left) / width
    resolution_y = (top - bottom) / height

    InputCrs = 'EPSG:4326'
    OutputCrs ='EPSG:4326'
    transformer = Transformer.from_crs(InputCrs, OutputCrs)
    longitude, latitude = transformer.transform(longitude, latitude)
    
    left_box = latitude - (resolution_x * CropSize)
    top_box = longitude + (resolution_y * CropSize)
    right_box = latitude + (resolution_x * CropSize)
    bottom_box = longitude - (resolution_y * CropSize)
    window = (left_box, top_box, right_box, bottom_box)

    gdal.Translate(OutputImage, InputImage, projWin = window)

### ndvi 년월별 crop function

In [None]:
def ndvi_filtering(o_data,crop_size,filepath):
    RefImage = '../data/gw_boundary/boundary_blank_resized.tif'
    for i in tqdm(range(len(o_data))):
        
        filename=str(o_data['date'][i])[:-2]
        
        year = filename[:4]
        month = int(filename[4:])
    
        if 3 <= month <= 5: month = 4 
        elif 6 <= month <= 9:month = 8
        elif 10 <= month <= 11: month = 11
        else: month = 1
        
        filename=year+str(month).zfill(2)
        
        InputImage = f'../data/geo_data/raw/NDVI/{filename}.tif' 
        OutputImage = filepath+'/Crop_NDVI_'+str(i)+'.tif'
        
        lon=o_data['lon'][i]
        lat=o_data['lat'][i]
        
        crop_image_using_coordinates(InputImage, OutputImage, RefImage, lon, lat, crop_size)

### train 지형 crop function

In [None]:
def crop_train(data_n,o_data,crop_size):
    filepath=f"D:/firedata/crop/train/{data_n}"
    os.makedirs(filepath, exist_ok=True)
    
    print('#'*20)
    print(f"Start get {data_n} information")
    tmp=natsort.natsorted(glob(filepath+"/*.tif"))
    
    if(len(tmp)==len(o_data)):
        print(f"--{data_n} data already existed")
        return
    else:
        print("-- Not enough file please check")
    """
    ndvi의 경우 년,월별로 데이터가 다르기 때문에
    따로 작업을 수행하여야 한다.
    """
    if(data_n=="NDVI"):
        ndvi_filtering(o_data,1,filepath)
        print(f"{data_n} Train data crop complete")
        return 
    
    InputImage = f'../data/geo_data/raw/{data_n}_gw.tif'   
    RefImage = '../data/gw_boundary/boundary_blank_resized.tif'
    
    for i in tqdm(range(len(o_data))):
        OutputImage = filepath+'/Crop_'+data_n+'_'+str(i)+'.tif'
        
        lon=o_data['lon'][i]
        lat=o_data['lat'][i]
        
        crop_image_using_coordinates(InputImage, OutputImage, RefImage, lon, lat, crop_size)
    print(f"{data_n} Train data crop complete")

### tif->npy 변환 function

In [None]:
def convert_npy(data_n,types,crop_size):
    filepath=f"D:/firedata/npy/{types}/"
    os.makedirs(filepath, exist_ok=True)
    
    if os.path.isfile(filepath+f'{data_n}_{types}.npy'):
        print("Already existed")
        return
    
    files=natsort.natsorted(glob(f"D:/firedata/crop/{types}/{data_n}/*.tif"))

    tif_list=[]
    for i in range(len(files)):
        tmp = cv2.imread(files[i], cv2.IMREAD_COLOR)
        tmp = cv2.cvtColor(tmp, cv2.COLOR_BGR2RGB)
        tmp = cv2.resize(tmp, (crop_size, crop_size))
        tmp = tmp / 255.0
        tif_list.append(tmp)

    data=np.array(tif_list)
    np.save(filepath+f'{data_n}_{types}.npy',data)
    print("Complete")

# Main code

In [104]:
train_data=pd.read_csv("../data/train_data/climate_train.csv")

crop_train('Height',train_data,1)
crop_train('Slope',train_data,1)
crop_train('Landuse',train_data,1)
crop_train('population_density',train_data,1)
crop_train('NDVI',train_data,1)

convert_npy("Height",'train',32)
convert_npy("Slope",'train',32)
convert_npy("Landuse",'train',32)
convert_npy("population_density",'train',32)
convert_npy("NDVI",'train',32)




####################
Start get Height information
--Height data already existed
####################
Start get Slope information
--Slope data already existed
####################
Start get Landuse information
--Landuse data already existed
####################
Start get population_density information
--population_density data already existed
####################
Start get NDVI information
--NDVI data already existed
Already existed
Already existed
Already existed
Already existed
Already existed


# Test land data crop

In [None]:
# 강원도 경계
N = 38.61370931
E = 129.359995
S = 37.03353708
W = 127.0950376

width = (E-W)/399
height= (N-S)/277

width_num,height_num=[], []
for i in range(400):
    width_num.append(W+width*i)
width_num=[round(i,7) for i in width_num]
for i in range(278):
    height_num.append(N-height*i)
height_num=[round(i,7) for i in height_num]

In [105]:
def crop_test(data_n,crop_size,width_num,height_num):
    
    filepath=f"D:/firedata/crop/test/{data_n}"
    os.makedirs(filepath, exist_ok=True)
    
    print('#'*20)
    print(f"Start get {data_n} information")
    tmp=natsort.natsorted(glob(filepath+"/*.tif"))
    
    if(len(tmp)==111200):
        print(f"--{data_n} data already existed")
        return
    else:
        print("-- Not enough file please check")
    
    if(data_n=='NDVI'):InputImage = f'../data/geo_data/raw/NDVI/202204.tif'   
    else:InputImage = f'../data/geo_data/raw/{data_n}_gw.tif'   
    
    RefImage = '../data/gw_boundary/boundary_blank_resized.tif'

    num=0

    for lat in tqdm(height_num):
        for lon in tqdm(width_num):
            OutputImage = filepath+'/Crop_'+data_n+'_'+str(num)+'.tif'
            num=num+1
            crop_image_using_coordinates(InputImage, OutputImage, RefImage, lon, lat, crop_size)

In [106]:
crop_test('NDVI',1,width_num,height_num)
crop_test('Height',1,width_num,height_num)
crop_test('Slope',1,width_num,height_num)
crop_test('Landuse',1,width_num,height_num)
crop_test('population_density',1,width_num,height_num)


convert_npy("Height",'train',32)
convert_npy("Slope",'train',32)
convert_npy("Landuse",'train',32)
convert_npy("population_density",'train',32)
convert_npy("NDVI",'train',32)


####################
Start get NDVI information
--NDVI data already existed
####################
Start get Height information
--Height data already existed
####################
Start get Slope information
-- Not enough file please check


 86%|████████▌ | 343/400 [00:18<00:03, 18.96it/s]
  0%|          | 0/278 [00:18<?, ?it/s]


KeyboardInterrupt: 

In [None]:
# convert_npy("Height",'test',24)
# convert_npy("Slope",'test',24)
# convert_npy("Landuse",'test',24)
# convert_npy("population_density",'test',24)
# convert_npy("Height",'train',24)