In [1]:
import sys
sys.path.append("/mnt/nvme2tb/ffp/code/mlfires/ML_fires_al/")
import pathlib
import pandas as pd
import xarray as xr
import numpy as np
import os
import crop_dataset
import geopandas as gpd
import random

In [2]:
'''
Adds a hash string ID in the dataset from x y coords 
'''
def applyid(df):
    df['xposst'] = (df['x'] * 10000).apply('{:06.0f}'.format)
    df['yposst'] = (df['y'] * 10000).apply('{:06.0f}'.format)
    df['id'] = df['xposst'] + df['yposst']
    df.drop(columns=['xposst', 'yposst'], inplace=True)
    return df

'''
Creates coarsen dataset subgrid from the initial dataset.


Input 
    coarsen_c : The number of points in the subgrid is 1/( coarsen_c * coarsen_c) 
                We need to use odd numbers to maintain existing points coordinates in the subgrid.
    file : The csv file to merge

Output  
    Corsen tabular pandas dataframe that contains only the instances of the points in the grid
'''

def getcoarsedf(coarsen_c, dfday):
    ds = xr.open_dataset('/mnt/nvme2tb/ffp/datasets/images/20211030_df.nc')
    dsc = ds.coarsen(y=coarsen_c, boundary='trim').mean().coarsen(x=coarsen_c, boundary='trim').mean()
    coordtupar = dsc.stack(dim=['x', 'y']).dim.to_numpy()
    coordnp = np.array([*coordtupar])
    dfcoocrds = pd.DataFrame(coordnp, columns=['x', 'y'], dtype=float)
    dfcoocrds = applyid(dfcoocrds)
    coarsen_df = pd.merge(dfday, dfcoocrds, on=['id'], suffixes=("", "_c")).drop(columns=['x_c', 'y_c'])
    return coarsen_df

def extract_day(date, extented = True):
    csvfolder = '/mnt/nvme2tb/ffp/datasets/prod/'
    '''
    gdfperif = crop_dataset.getperif()
    crop_dataset.cropfile(os.path.join(csvfolder,date,'%s_norm.csv'%date),
                      os.path.join(csvfolder,date, gdfperif, '_greece'),
                      usexyid='id')
    '''

    csvfile = os.path.join(csvfolder, date, '%s_norm_greece.csv' % date)

    # csv for xai input
    
    coarsen_coef=31
    dfday = pd.read_csv(csvfile, dtype={'id': str})
    coarsedf = getcoarsedf(coarsen_coef, dfday)
    
    if extented:
        dfpred=extract_xy(pd.read_csv(os.path.join(csvfolder,date,"%s_pred_greece.csv"%date), dtype={'id': str}))    
        dfexids,gdfbuffer=getextrapoints(coarsedf, dfpred, coarsen_coef)
        coarsedf = pd.merge(dfexids, dfday, on=['id'])
  
    xaifolder = '/mnt/nvme2tb/ffp/datasets/xai/%s' % date
   
    if not os.path.isdir(xaifolder): os.makedirs(xaifolder)
    csvcoarse = os.path.join(xaifolder, '%s_xai_ext_inp.csv' % date)
    coarsedf.to_csv(csvcoarse, index=False)
    gdfbuffer.to_file(os.path.join(xaifolder, '%s_xai_ext_inp.shp' % date))
    return coarsedf

def extract_xy(dfxai):
    dfxai['x']=dfxai['id'].str.slice(0,6).astype(int)/10000
    dfxai['y']=dfxai['id'].str.slice(6,12).astype(int)/10000
    return dfxai

def getcenters(dfcoarse, dfpred):
    dfcenter=pd.merge(dfcoarse[['id','max_temp']], dfpred, on='id', how='right')
    dfcenter.loc[~dfcenter['max_temp'].isna(), 'max_temp']=1
    dfcenter.loc[dfcenter['max_temp'].isna(), 'max_temp']=0
    dfcenter.rename(columns={'max_temp':'center'},inplace=True)
    dfcenter['center']=dfcenter['center'].astype(int)
    
    geom = gpd.points_from_xy(dfcenter['x'], dfcenter['y'], crs=4326)
    gdfcenter = gpd.GeoDataFrame(dfcenter, geometry=geom)
    
    return dfcenter

def getextrapoints(dfcoarse, dfpred, coarsen_coef):
    #merge the points from the coarsening with the rest of the dataset. 
    #Create "center" column to mark which points are the chosen after the coarsening
    dfcenter=pd.merge(dfcoarse[['id','max_temp']], dfpred, on='id', how='right')
    dfcenter.loc[~dfcenter['max_temp'].isna(), 'max_temp']=1
    dfcenter.loc[dfcenter['max_temp'].isna(), 'max_temp']=0
    dfcenter.rename(columns={'max_temp':'center'},inplace=True)
    dfcenter['center']=dfcenter['center'].astype(int)
    
    #create geodataframe with point geometries. Change to crs 2100 or 3857 for meter coordinates
    geom = gpd.points_from_xy(dfcenter['x'], dfcenter['y'], crs=4326)
    gdfcenter = gpd.GeoDataFrame(dfcenter, geometry=geom)
    gdfcenter=gdfcenter.to_crs(crs=3857) 
    
    # spatial join of center points with all points around centers 
    # using the coarsen coefficient to create a square buffer of 500*(coarsen_coef-1)/2 meters. 
    # Needs a correction coefficient 1.18
    gdfcenter2=gdfcenter.loc[gdfcenter['center']==1].copy()
    gdfcenter2['geometry']=gdfcenter2.geometry.buffer((coarsen_coef-1)/2*500*1.18,cap_style=3)
    #gdfcenter2['geometry']=gdfcenter2.geometry.buffer((coarsen_coef-1)/2*0.075,cap_style=3)
    gdfcenter2.drop(columns=['ypred0','ypred1','x','y'], inplace=True)     
    gdfsjoin=gdfcenter2.sjoin(gdfcenter, how="left")
    
    
    # find and select one point id for each risk level
    # for the points in the buffer keeping the initial center point
    sampleids=[]
    for ind in gdfcenter2.index:
        sampleids+=[gdfcenter2.loc[gdfcenter2.index==ind,'id'].item()]
        for risk in range(1,6):
            allrows=gdfsjoin.loc[(gdfsjoin.index==ind)\
                         &(gdfsjoin['id_left']!=gdfsjoin['id_right'])\
                         &(gdfsjoin['risk_left']!=gdfsjoin['risk_right'])\
                         &(gdfsjoin['risk_right']==risk)]
            if not allrows.empty:
                #print(ind,allrows.iloc[0]["id_right"])
                celln = random.randint(0, len(allrows)-1)
                #print(len(allrows), celln)
                sampleids+=[allrows.iloc[celln]["id_right"]]
    dfextids = pd.DataFrame(sampleids, columns=['id'])
    return dfextids, gdfcenter2
    

In [5]:
extract_day('20230825')

Unnamed: 0,id,x,y,dom_dir,dom_vel,res_max,dir_max,max_temp,min_temp,mean_temp,...,corine_gr4,corine_gr5,corine_gr21,corine_gr22,corine_gr23,corine_gr24,corine_gr31,corine_gr32,corine_gr33,fire
0,198017397712,0.050504,0.702992,1.0,0.061934,0.030523,1.0,0.775728,0.876531,0.824093,...,0.0,0.0,0.000000,1.000000,0.0,0.000000,0.000000,0.000000,0.000000,0
1,198326397455,0.053498,0.699362,3.0,0.078003,0.054286,8.0,0.755564,0.822493,0.794524,...,0.0,0.0,0.000000,0.000000,0.0,0.771060,0.000000,0.228940,0.000000,0
2,198533397455,0.055494,0.699362,2.0,0.081361,0.063914,3.0,0.760856,0.829608,0.800666,...,0.0,0.0,0.000000,0.000000,0.0,0.000000,0.000000,1.000000,0.000000,0
3,198636397867,0.056492,0.705170,4.0,0.072483,0.081607,7.0,0.783606,0.868262,0.824552,...,0.0,0.0,0.000000,0.000000,0.0,0.692327,0.000000,0.297993,0.009679,0
4,198790397558,0.057989,0.700814,3.0,0.073157,0.071418,8.0,0.769644,0.844608,0.813971,...,0.0,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.438743,0.561257,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2335,268323376940,0.731105,0.410425,1.0,0.234696,0.205059,1.0,0.849920,0.941994,0.902045,...,0.0,0.0,0.000000,0.000000,0.0,0.518187,0.000000,0.481813,0.000000,0
2336,268993376889,0.737592,0.409699,1.0,0.203712,0.173757,1.0,0.844061,0.981147,0.909653,...,0.0,0.0,0.080821,0.013512,0.0,0.625685,0.000000,0.000000,0.000000,0
2337,268632376579,0.734099,0.405343,5.0,0.083305,0.119620,8.0,0.862091,0.901068,0.876449,...,0.0,0.0,0.000000,0.691510,0.0,0.000000,0.000000,0.308490,0.000000,0
2338,267602377404,0.724120,0.416959,1.0,0.173163,0.165437,2.0,0.764736,0.849528,0.814826,...,0.0,0.0,0.000000,0.000000,0.0,0.785536,0.214464,0.000000,0.000000,0


In [None]:
for d in range(25,29):
    date='202308'+str(d)
    extract_day(date)

In [3]:
ds = xr.open_dataset('/mnt/nvme2tb/ffp/datasets/images/20211030_df.nc')

In [4]:
ds

In [11]:
date='20230825'
csvfolder='/mnt/nvme2tb/ffp/datasets/prod/'
csvfile = os.path.join(csvfolder, date, '%s_norm_greece.csv' % date)
dfday = pd.read_csv(csvfile, dtype={'id': str})
getcoarsedf(31, dfday)

Unnamed: 0,x,y,dom_dir,dom_vel,res_max,dir_max,max_temp,min_temp,mean_temp,max_dew_temp,...,corine_gr5,corine_gr21,corine_gr22,corine_gr23,corine_gr24,corine_gr31,corine_gr32,corine_gr33,fire,id
0,0.050504,0.702992,1.0,0.061934,0.030523,1.0,0.775728,0.876531,0.824093,0.845580,...,0.0,0.00000,1.000000,0.000000,0.000000,0.000000,0.000000,0.0,0,198017397712
1,0.050504,0.680487,4.0,0.073516,0.094769,7.0,0.821182,0.870185,0.846248,0.797470,...,0.0,0.00000,0.000000,0.285088,0.665752,0.000000,0.000000,0.0,0,198017396115
2,0.065972,0.657981,7.0,0.117912,0.087076,7.0,0.818595,0.873952,0.841701,0.771628,...,0.0,0.00000,0.000000,0.000000,1.000000,0.000000,0.000000,0.0,0,199615394517
3,0.127845,0.545456,1.0,0.150438,0.158995,8.0,0.801099,0.861916,0.832319,0.752371,...,0.0,0.00000,0.935626,0.000000,0.064374,0.000000,0.000000,0.0,0,206006386527
4,0.127845,0.500446,3.0,0.082901,0.161399,8.0,0.784493,0.837493,0.811442,0.761070,...,0.0,0.00000,0.000000,0.000000,0.000000,0.028950,0.971050,0.0,0,206006383332
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
515,0.669233,0.635476,2.0,0.174993,0.279914,3.0,0.832415,0.777797,0.824237,0.764912,...,0.0,0.00000,0.004451,0.000000,0.924670,0.070879,0.000000,0.0,0,261932392919
516,0.684701,0.635476,2.0,0.204527,0.191952,3.0,0.854617,0.822810,0.840932,0.806481,...,0.0,0.05788,0.942120,0.000000,0.000000,0.000000,0.000000,0.0,0,263530392919
517,0.684701,0.612971,2.0,0.198221,0.168210,2.0,0.845900,0.780954,0.821779,0.771982,...,0.0,0.00000,0.000000,0.000000,0.000000,0.000000,1.000000,0.0,0,263530391321
518,0.700169,0.612971,2.0,0.262823,0.233474,2.0,0.876355,0.858466,0.867721,0.790150,...,0.0,0.00000,0.772236,0.000000,0.000000,0.000000,0.000000,0.0,0,265127391321


In [12]:
csvfile

'/mnt/nvme2tb/ffp/datasets/prod/20230825/20230825_norm_greece.csv'

In [13]:
pd.read_csv(csvfile)

Unnamed: 0,x,y,dom_dir,dom_vel,res_max,dir_max,max_temp,min_temp,mean_temp,max_dew_temp,...,corine_gr5,corine_gr21,corine_gr22,corine_gr23,corine_gr24,corine_gr31,corine_gr32,corine_gr33,fire,id
0,0.023559,0.702992,8.0,0.109770,0.078851,8.0,0.710493,0.832685,0.778259,0.905384,...,0.077947,0.0,0.000000,0.0,0.170539,0.000000,0.751514,0.0,0,195234397712
1,0.024059,0.702266,1.0,0.076210,0.060020,8.0,0.727614,0.827685,0.785350,0.897344,...,0.016938,0.0,0.000000,0.0,0.171231,0.000000,0.811831,0.0,0,195285397661
2,0.028549,0.720415,8.0,0.122357,0.091567,8.0,0.732463,0.834801,0.788102,0.918913,...,0.163862,0.0,0.000000,0.0,0.000000,0.000000,0.836138,0.0,0,195749398949
3,0.028549,0.719689,8.0,0.113340,0.082457,8.0,0.695513,0.862685,0.778310,0.918129,...,0.078394,0.0,0.000000,0.0,0.338821,0.000000,0.582786,0.0,0,195749398898
4,0.028549,0.718963,8.0,0.113340,0.082457,8.0,0.695513,0.862685,0.778310,0.918129,...,0.379461,0.0,0.000000,0.0,0.599634,0.000000,0.020905,0.0,0,195749398846
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
500496,0.752062,0.414055,8.0,0.209997,0.180106,8.0,0.844769,0.874336,0.857225,0.829455,...,0.000000,0.0,0.811821,0.0,0.000000,0.000000,0.188178,0.0,0,270488377198
500497,0.752561,0.414055,8.0,0.160243,0.129841,8.0,0.814920,0.909720,0.858382,0.854985,...,0.000000,0.0,0.424788,0.0,0.000000,0.433099,0.142113,0.0,0,270540377198
500498,0.752561,0.413329,8.0,0.160243,0.129841,8.0,0.814920,0.909720,0.858382,0.854985,...,0.099401,0.0,0.534357,0.0,0.000000,0.000000,0.366242,0.0,0,270540377146
500499,0.753060,0.414055,8.0,0.160243,0.129841,8.0,0.814920,0.909720,0.858382,0.854985,...,0.000000,0.0,0.000000,0.0,0.000000,0.397188,0.602812,0.0,0,270591377198
