In [1]:
import os
import calendar
import time
import datatable as dt
import fileutils
import traceback

In [2]:
def load_datatable(fpath, fill=False):
    if not os.path.exists(fpath):
        print('Path not found : %s'%fpath)
        raise
    DT = dt.fread(fpath, fill=fill)
    return DT

In [3]:
def walkmonthfiles(sfolder):
    for fmonth in fileutils.find_files(sfolder, '*_norm.csv', listtype="walk"):
        #print(fmonth)
        monthdir = os.path.dirname(fmonth)
        year = os.path.basename(os.path.dirname(monthdir))
        if year[0:2] != '20':
            continue
        yield fmonth
        #extractdays(fmonth, tfolder)

In [4]:
def read_multiple_csv(sfolder, pattern):
    #pattern example '*_norm.csv'
    exfeat = ["","id", "firedate"]
    DTall = None
    for f in fileutils.find_files(sfolder, pattern, listtype="walk"):
        print(f)
        try:
            DTpart=load_datatable(f)
        except:
            print("Fail to load %s :\n"%f+traceback.format_exc())
            raise
        if DTall is None: 
            DTall = DTpart
        else:
            DTall=dt.rbind(DTall, DTpart)
    return DTall

In [5]:
def filterDT(DT, filt):
    DT=DT[:, [name for name in filt]]
    return DT
    

In [6]:
def aggrfuncs(DTaggr, func, y, pattern, agg):
    DTa=func()
    DTa['Year'] = '%d'%(y)
    DTa['Pattern'] = pattern
    DTa['Aggr'] = agg
    if DTaggr==None:
        DTaggr=DTa
    else:
        DTaggr = dt.rbind(DTaggr,DTa)
    return DTaggr

In [7]:
def statmonths(years, sfolder, pattern, filt):
    #sfolder = '/data2/ffp/datasets/daily/%s/08'
    DTmeans=None; DTmin=None; DTmax=None; DTsd=None;
    #filt=['ndvi_new','evi', 'lst_day', 'lst_night']
    #filt=['max_temp','min_temp','mean_temp','rain_7days','res_max','dom_vel','max_dew_temp','min_dew_temp','mean_dew_temp']
    #exclude onehot = ['corine', 'dir_max', 'dom_dir', 'wkd', 'month']+["C0","id", "firedate"]
    for y in years:
        print(y)
        #sfolder = '/data2/ffp/datasets/daily/%d/08'%y
        yfolder = sfolder%y
        print(yfolder)
        DT = read_multiple_csv(yfolder, pattern)
        DT = DT[:, [name for name in filt]]
        DTmeans = aggrfuncs(DTmeans, DT.mean, y, pattern, 'mean')
        DTmin = aggrfuncs(DTmin, DT.min, y, pattern, 'min')
        DTmax = aggrfuncs(DTmax, DT.max, y, pattern, 'max')
        DTsd = aggrfuncs(DTsd, DT.sd, y, pattern, 'sd')
        DT = None
    return DTmeans, DTmin, DTmax, DTsd 

In [15]:
def load_test(years, sfolder, pattern):
    for y in years:
        print(y)
        #sfolder = '/data2/ffp/datasets/daily/%d/08'%y
        yfolder = sfolder%y
        print("%s, %s: Loading"%(yfolder,pattern))
        try:
            DT = read_multiple_csv(yfolder, pattern)
            if DT is not None:
                print("%s, %s: Loaded successfully"%(yfolder,pattern))
                print(DT.names)
                print('number of days : %d'%DT['firedate'].nunique1())
                dt.unique(DT['firedate'])
                wdays = len([d for d in DT.names if 'wkd_' in d ])
                months = len([d for d in DT.names if 'month_' in d ])
                corines = len([d for d in DT.names if 'corine_' in d ])
                dir_maxs = len([d for d in DT.names if 'dir_max_' in d ])
                dom_dirs = len([d for d in DT.names if 'dom_dir_' in d ])
                total_feat = len(DT.names)
                print('Total features: %d, week days: %d, months: %d, corine: %d, dom_dir: %d, dir_max: %d'%\
                     (total_feat, wdays, months, corines, dom_dirs, dir_maxs))
            else:
                print("%s, %s: No dataset"%(yfolder,pattern))
        except:
            print(traceback.format_exc())

In [9]:
read_multiple_csv('/data2/ffp/datasets/monthly/2020/', 'june_*_norm.csv')

/data2/ffp/datasets/monthly/2020/06/june_2020_norm.csv


Unnamed: 0_level_0,id,firedate,max_temp,min_temp,mean_temp,res_max,dom_vel,rain_7days,dem,slope,…,month_9,frequency,f81,x,y
Unnamed: 0_level_1,▪▪▪▪▪▪▪▪,▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,Unnamed: 11_level_1,▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪
0,333237,20200605,0.583038,0.624735,0.592991,0.30241,0.334799,0.0483197,0.0321458,0.0309607,…,0,0,0.00139211,0.781966,1
1,333237,20200618,0.599839,0.653896,0.622035,0.0364355,0.128298,0.110203,0.0321458,0.0309607,…,0,0,0.00139211,0.781966,1
2,333237,20200613,0.59262,0.674296,0.609795,0.126569,0.0944475,0.0311833,0.0321458,0.0309607,…,0,0,0.00139211,0.781966,1
3,335462,20200605,0.570964,0.628872,0.587096,0.30241,0.329836,0.0467675,0.0296805,0.0298643,…,0,0,0.00139211,0.780863,0.999466
4,335462,20200618,0.588786,0.649126,0.612406,0.0364355,0.131339,0.127404,0.0296805,0.0298643,…,0,0,0.00139211,0.780863,0.999466
5,335462,20200613,0.585607,0.661885,0.59886,0.126569,0.0941413,0.0397643,0.0296805,0.0298643,…,0,0,0.00139211,0.780863,0.999466
6,335463,20200605,0.583038,0.624735,0.592991,0.30241,0.334799,0.0483197,0.03441,0.027004,…,0,0,0.00139211,0.781199,0.999647
7,335463,20200618,0.599839,0.653896,0.622035,0.0364355,0.128298,0.110203,0.03441,0.027004,…,0,0,0.00139211,0.781199,0.999647
8,335463,20200613,0.59262,0.674296,0.609795,0.126569,0.0944475,0.0311833,0.03441,0.027004,…,0,0,0.00139211,0.781199,0.999647
9,335464,20200605,0.583038,0.624735,0.592991,0.30241,0.334799,0.0483197,0.0321458,0.0309607,…,0,0,0.00139211,0.781797,0.999929


In [17]:
#for m in ['june','july','august','september']:
for m in ['june','july','august','september']:
#for m in ['september']:
    load_test(range(2020,2021),'/data2/ffp/datasets/monthly/%d/','%s_*_norm.csv'%m)

2020
/data2/ffp/datasets/monthly/2020/, june_*_norm.csv: Loading
/data2/ffp/datasets/monthly/2020/06/june_2020_norm.csv
/data2/ffp/datasets/monthly/2020/, june_*_norm.csv: Loaded successfully
('id', 'firedate', 'max_temp', 'min_temp', 'mean_temp', 'res_max', 'dom_vel', 'rain_7days', 'dem', 'slope', 'curvature', 'aspect', 'ndvi_new', 'evi', 'lst_day', 'lst_night', 'max_dew_temp', 'mean_dew_temp', 'min_dew_temp', 'fire', 'dir_max_1', 'dir_max_2', 'dir_max_3', 'dir_max_4', 'dir_max_5', 'dir_max_6', 'dir_max_7', 'dir_max_8', 'dom_dir_1', 'dom_dir_2', 'dom_dir_3', 'dom_dir_4', 'dom_dir_5', 'dom_dir_6', 'dom_dir_7', 'dom_dir_8', 'corine_111', 'corine_112', 'corine_121', 'corine_122', 'corine_123', 'corine_124', 'corine_131', 'corine_132', 'corine_133', 'corine_141', 'corine_142', 'corine_211', 'corine_212', 'corine_213', 'corine_221', 'corine_222', 'corine_223', 'corine_231', 'corine_241', 'corine_242', 'corine_243', 'corine_244', 'corine_311', 'corine_312', 'corine_313', 'corine_321', 'cori

In [34]:
filt=['ndvi_new','evi', 'lst_day', 'lst_night']
#exclude onehot = ['corine', 'dir_max', 'dom_dir', 'wkd', 'month']+["C0","id", "firedate"]

In [38]:
filt=['ndvi_new','evi', 'lst_day', 'lst_night']
DTmeans, DTmin, DTmax, DTsd = statmonths(range(2019,2021),'/data2/ffp/datasets/monthly/%d/', 'june_*_norm.csv', filt)

2019
/data2/ffp/datasets/monthly/2019/
/data2/ffp/datasets/monthly/2019/06/june_2019_norm.csv
2020
/data2/ffp/datasets/monthly/2020/
/data2/ffp/datasets/monthly/2020/06/june_2020_norm.csv


In [36]:
DTmeans

Unnamed: 0_level_0,ndvi_new,evi,lst_day,lst_night,Year,Pattern,Aggr
Unnamed: 0_level_1,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪
0,0.682473,0.493776,0.554,0.513417,2019,june_*_norm.csv,mean
1,0.675653,0.489669,0.527276,0.475536,2020,june_*_norm.csv,mean


In [41]:
filt=['max_temp','min_temp','mean_temp','rain_7days','res_max','dom_vel','max_dew_temp','min_dew_temp','mean_dew_temp']
DTmeans2, DTmin2, DTmax2, DTsd2 = statmonths(range(2019,2021),'/data2/ffp/datasets/monthly/%d/', 'june_*_norm.csv', filt)

2019
/data2/ffp/datasets/monthly/2019/
/data2/ffp/datasets/monthly/2019/06/june_2019_norm.csv
2020
/data2/ffp/datasets/monthly/2020/
/data2/ffp/datasets/monthly/2020/06/june_2020_norm.csv


In [62]:
DTmeans.to_csv('august_means_sat.csv')
DTmax.to_csv('august_max_sat.csv')
DTmin.to_csv('august_min_sat.csv')
DTsd.to_csv('august_sd_sat.csv')

In [42]:
DTmeans2

Unnamed: 0_level_0,max_temp,min_temp,mean_temp,rain_7days,res_max,dom_vel,max_dew_temp,min_dew_temp,mean_dew_temp,Year,Pattern,Aggr
Unnamed: 0_level_1,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪
0,0.601616,0.69703,0.641794,0.0302464,0.368251,0.156004,0.695115,0.702692,0.694463,2019,june_*_norm.csv,mean
1,0.549206,0.643685,0.586011,0.0389301,0.321998,0.16051,0.645632,0.663797,0.648659,2020,june_*_norm.csv,mean


In [3]:
DT2 = load_datatable('/data2/ffp/datasets/monthly/2020/august_2020/august_2020_norm.csv')

In [28]:
DT[dt.f["ndvi_new"] > 1, dt.sd()]

NameError: name 'DT' is not defined

In [34]:
DT2[dt.f["evi"] <0, dt.count(dt.f.evi) ]

Unnamed: 0_level_0,evi
Unnamed: 0_level_1,▪▪▪▪▪▪▪▪
0,0


In [None]:
DT2[dt.f["evi"] <= 1, dt.sd(dt.f["evi"])]

In [10]:
DT2 = load_datatable('/data2/ffp/datasets/monthly/2020/09/september_2020_norm.csv')

In [57]:
DT2[:, {'ndvi mean': dt.mean(dt.f.ndvi_new), 'ndvi sd': dt.sd(dt.f.ndvi_new)}]

Unnamed: 0_level_0,ndvi mean,ndvi sd
Unnamed: 0_level_1,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪
0,0.998264,0.592549


In [14]:
DT2['firedate'].nunique1()

29