In [None]:
from tqdm import tqdm
from osgeo import gdal, ogr
import geopandas as gpd
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import os
from pyproj import Proj, Transformer
import rasterio
import natsort
from glob import glob
import cv2


In [None]:
"""
인구밀도 분류
[0,1,2,3,4,5,10,20,30,40,50,60,70,80,90,100,200,300,400 ,500,1000, 2000,3000, 4000,5000,6000,7000, 8000,9000,10000,11000,12000,13000,14000,15000, 16000, 20000]  

토지이용도 분류
'10': ['제1종일반주거지역', '제2종일반주거지역', '제3종일반주거지역','준주거지역', '제1종전용주거지역','제2종전용주거지역'],  # Residential 빨강
'2': ['일반상업지역', '근린상업지역', '중심상업지역'],                             # Commercial       파랑
'12': ['자연녹지지역', '보전녹지지역', '자연환경보전지역','보전관리지역'],          # Green Area      초록 
'8' : ['농림지역'],                                                             # Agriculuture       연두
'6': ['일반공업지역', '준공업지역', '전용공업지역','생산관리지역','생산녹지지역'],  # Industrial       노랑     
'4': ['기타', '용도미지정','계획관리지역', '관리지역']                            # Miscellaneous       하늘

NDVI 분류
3월~5월 -> 4월
6월~9월 -> 8월
10월~11월 -> 11월
그외 -> 1월
"""


### Data crop function

In [None]:
def crop_image_using_coordinates(InputImage, OutputImage, RefImage, latitude, longitude, CropSize):\
    
    #참조 이미지의 boundary를 가져옴 
    Image = gdal.Open(RefImage, gdal.GA_ReadOnly)
    width = Image.RasterXSize
    height = Image.RasterYSize
    Image = None

    """extract vertex coordinates"""
    rds = rasterio.open(RefImage)
    rds.bounds
    left = rds.bounds[0]
    right = rds.bounds[2]
    top = rds.bounds[3]
    bottom = rds.bounds[1]

    resolution_x = (right - left) / width
    resolution_y = (top - bottom) / height

    InputCrs = 'EPSG:4326'
    OutputCrs ='EPSG:4326'
    transformer = Transformer.from_crs(InputCrs, OutputCrs)
    longitude, latitude = transformer.transform(longitude, latitude)
    
    left_box = latitude - (resolution_x * CropSize)
    top_box = longitude + (resolution_y * CropSize)
    right_box = latitude + (resolution_x * CropSize)
    bottom_box = longitude - (resolution_y * CropSize)
    window = (left_box, top_box, right_box, bottom_box)

    gdal.Translate(OutputImage, InputImage, projWin = window)

### ndvi 년월별 crop function

In [None]:
def ndvi_filtering(o_data,crop_size,filepath):
    RefImage = '../data/gw_boundary/boundary_blank_resized.tif'
    for i in tqdm(range(len(o_data))):
        
        filename=str(o_data['date'][i])[:-2]
        
        year = filename[:4]
        month = int(filename[4:])
    
        if 3 <= month <= 5: month = 4 
        elif 6 <= month <= 9:month = 8
        elif 10 <= month <= 11: month = 11
        else: month = 1
        
        filename=year+str(month).zfill(2)
        
        InputImage = f'../data/geo_data/raw/NDVI/{filename}.tif' 
        OutputImage = filepath+'/Crop_NDVI_'+str(i)+'.tif'
        
        lon=o_data['lon'][i]
        lat=o_data['lat'][i]
        
        crop_image_using_coordinates(InputImage, OutputImage, RefImage, lon, lat, crop_size)

### train 지형데이터 크롭 function

In [None]:
def crop_train(data_n,o_data,crop_size):
    filepath=f"D:/firedata/crop/train/{data_n}"
    os.makedirs(filepath, exist_ok=True)
    
    print(f"Start get {data_n} information")
    tmp=natsort.natsorted(glob(filepath+"/*.tif"))
    
    if(len(tmp)==len(o_data)):
        print(f"----{data_n} train crop image already existed")
        return
    else:
        print(f"Create {data_n} train crop image start")
    """
    ndvi의 경우 년,월별로 데이터가 다르기 때문에
    따로 작업을 수행하여야 한다.
    """
    if(data_n=="NDVI"):
        ndvi_filtering(o_data,1,filepath)
        print(f"Complete")
        return 
    
    InputImage = f'../data/geo_data/raw/{data_n}_gw.tif'   
    RefImage = '../data/gw_boundary/boundary_blank_resized.tif'
    
    for i in tqdm(range(len(o_data))):
        OutputImage = filepath+'/Crop_'+data_n+'_'+str(i)+'.tif'
        
        lon=o_data['lon'][i]
        lat=o_data['lat'][i]
        
        crop_image_using_coordinates(InputImage, OutputImage, RefImage, lon, lat, crop_size)
    print(f"Ccomplete")

### test 지형데이터 크롭 function

In [None]:
def crop_test(data_n,crop_size,width_num,height_num):
    
    filepath=f"D:/firedata/crop/test/{data_n}"
    os.makedirs(filepath, exist_ok=True)
    
    print('#'*20)
    print(f"Start get {data_n} information")
    tmp=natsort.natsorted(glob(filepath+"/*.tif"))
    
    if(len(tmp)==111200):
        print(f"----{data_n} test crop image already existed")
        return
    else:
        print(f"Create {data_n} test crop image start")
    
    if(data_n=='NDVI'):InputImage = f'../data/geo_data/raw/NDVI/202204.tif'   
    else:InputImage = f'../data/geo_data/raw/{data_n}_gw.tif'   
    
    RefImage = '../data/gw_boundary/boundary_blank_resized.tif'

    num=0

    for lat in tqdm(height_num):
        for lon in tqdm(width_num):
            OutputImage = filepath+'/Crop_'+data_n+'_'+str(num)+'.tif'
            num=num+1
            crop_image_using_coordinates(InputImage, OutputImage, RefImage, lon, lat, crop_size)
    print(f"Ccomplete")

### tif->npy 변환 function

In [None]:
def convert_npy(data_n,types,image_size):
    filepath=f"D:/firedata/npy{image_size}/{types}/"
    os.makedirs(filepath, exist_ok=True)
    
    if os.path.isfile(filepath+f'{data_n}_{types}.npy'):
        print(f"----{data_n}_{types}.npy file already existed")
        return
    
    print(f"Create {data_n}_{types}.npy start")
    
    files=natsort.natsorted(glob(f"D:/firedata/crop/{types}/{data_n}/*.tif"))

    tif_list=[]
    for i in range(len(files)):
        tmp = cv2.imread(files[i], cv2.IMREAD_COLOR)
        tmp = cv2.cvtColor(tmp, cv2.COLOR_BGR2RGB)
        tmp = cv2.resize(tmp, (image_size, image_size))
        tmp = tmp / 255.0
        tif_list.append(tmp)

    data=np.array(tif_list)
    np.save(filepath+f'{data_n}_{types}.npy',data)
    print("Complete")

#### train, test 지형데이터셋 만드는 function

In [None]:
def make_train_land_data(cropsize,imagesize):
    print("Train land_dataset create start")
    
    train_data=pd.read_csv("../data/modeling_data/train/climate_train.csv")
    
    data_n=['Height','Slope','Landuse','population_density','NDVI']
    
    for i in range(len(data_n)):
        print("#"*20)
        crop_train(data_n[i],train_data,cropsize)
        convert_npy(data_n[i],'train',imagesize)
        
    print("Train land_dataset create complete")
    print("#"*30)
        
def make_test_land_data(cropsize,imagesize):
    
    print("Test land_dataset create start")
    #! 강원도 경계
    N = 38.61370931
    E = 129.359995
    S = 37.03353708
    W = 127.0950376

    width = (E-W)/399
    height= (N-S)/277

    width_num,height_num=[], []
    for i in range(400):
        width_num.append(round(W+width*i,7))
    for i in range(278):
        height_num.append(round(N-height*i,7))
    
    data_n=['Height','Slope','Landuse','population_density','NDVI']
    
    for i in range(len(data_n)):   
        print("#"*20)
        crop_test(data_n[i],cropsize,width_num,height_num)
        convert_npy(data_n[i],'test',imagesize)

    print("Test land_dataset create complete")
    

# Main code

In [None]:
make_train_land_data(1,24)
make_test_land_data(1,24)