In [1]:
import numpy as np
import xarray as xr
import pandas as pd
import joblib

from matplotlib import pylab as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestRegressor

In [4]:
nc_org     = xr.open_dataset('DATA_DAY/2m_temperature.nc')-273.15
nc_tem     = xr.open_dataset('DATA_TEM_MOD/t2m_mod.nc')-273.15
nc_rad     = xr.open_dataset('DATA_DAY/surface_solar_radiation_downwars.nc')/3600
nc_soiltem = xr.open_dataset('DATA_DAY/soil_temperature_level_1.nc')-273.15
nc_pre     = xr.open_dataset('DATA_DAY/total_precipitation.nc')*1000
nc_dew     = xr.open_dataset('DATA_DAY/2m_dewpoint_temperature.nc')-273.15
nc_uwind   = xr.open_dataset('DATA_DAY/10m_u_component_of_wind.nc')
nc_vwind   = xr.open_dataset('DATA_DAY/10m_v_component_of_wind.nc')
nc_leafhigh= xr.open_dataset('DATA_DAY/leaf_area_index_high_vegetation.nc')
nc_leaflow = xr.open_dataset('DATA_DAY/leaf_area_index_low_vegetation.nc')
nc_skin    = xr.open_dataset('DATA_DAY/skin_reservoir_content.nc')
nc_vol     = xr.open_dataset('DATA_DAY/volumetric_soil_water_layer_1.nc')
nc_eva     = xr.open_dataset('DATA_DAY/evaporation.nc')*(-1000)


nc_BUI  = xr.open_dataset('../../FWI/RESULTADOS_FWI10_NC/BUI.nc')
nc_DC   = xr.open_dataset('../../FWI/RESULTADOS_FWI10_NC/DC.nc')
nc_DMC  = xr.open_dataset('../../FWI/RESULTADOS_FWI10_NC//DMC.nc')
nc_DSR  = xr.open_dataset('../../FWI/RESULTADOS_FWI10_NC/DSR.nc')
nc_FFMC = xr.open_dataset('../../FWI/RESULTADOS_FWI10_NC/FFMC.nc')
nc_FWI  = xr.open_dataset('../../FWI/RESULTADOS_FWI10_NC/FWI.nc')
nc_ISI  = xr.open_dataset('../../FWI/RESULTADOS_FWI10_NC/ISI.nc')
nc_M    = xr.open_dataset('../../FWI/RESULTADOS_FWI10_NC/M.nc')
nc_WIND = xr.open_dataset('../../FWI/RESULTADOS_FWI10_NC/WIND.nc')

nc_org = nc_org.rename({'t2m': 't2m_org'})

tem      = nc_tem.t2m.assign_coords(strftime = nc_tem.t2m.strftime.values.astype('datetime64[ns]'))  #DESACTIVAR con datos modificados en tiempo o long
pre      = nc_pre.tp.assign_coords(strftime = nc_pre.tp.strftime.values.astype('datetime64[ns]'))
rad      = nc_rad.ssrd.assign_coords(strftime = nc_rad.ssrd.strftime.values.astype('datetime64[ns]'))
soiltem  = nc_soiltem.stl1.assign_coords(strftime = nc_soiltem.stl1.strftime.values.astype('datetime64[ns]'))     
dew      = nc_dew.d2m.assign_coords(strftime = nc_dew.d2m.strftime.values.astype('datetime64[ns]'))    
uwind    = nc_uwind.u10.assign_coords(strftime = nc_uwind.u10.strftime.values.astype('datetime64[ns]'))   
vwind    = nc_vwind.v10.assign_coords(strftime = nc_vwind.v10.strftime.values.astype('datetime64[ns]'))  
leafhigh = nc_leafhigh.lai_hv.assign_coords(strftime = nc_leafhigh.lai_hv.strftime.values.astype('datetime64[ns]'))
leaflow  = nc_leaflow.lai_lv.assign_coords(strftime = nc_leaflow.lai_lv.strftime.values.astype('datetime64[ns]'))
skin     = nc_skin.src.assign_coords(strftime = nc_skin.src.strftime.values.astype('datetime64[ns]'))
vol      = nc_vol.swvl1.assign_coords(strftime = nc_vol.swvl1.strftime.values.astype('datetime64[ns]'))
eva      = nc_eva.e.assign_coords(strftime = nc_eva.e.strftime.values.astype('datetime64[ns]'))

nc_BUI  = nc_BUI.BUI.rename({'time':'strftime'}) 
nc_DC   = nc_DC.DC.rename({'time':'strftime'}) 
nc_DMC  = nc_DMC.DCM.rename({'time':'strftime'})
nc_DSR  = nc_DSR.DSR.rename({'time':'strftime'})
nc_FFMC = nc_FFMC.FFMC.rename({'time':'strftime'})
nc_FWI  = nc_FWI.FWI.rename({'time':'strftime'})
nc_ISI  = nc_ISI.ISI.rename({'time':'strftime'})
nc_M    = nc_M.M.rename({'time':'strftime'})
nc_WIND = nc_WIND.WIND.rename({'time':'strftime'})

BUI  = nc_BUI.assign_coords(strftime = nc_BUI.strftime.values.astype('datetime64[ns]'))  
DC   = nc_DC.assign_coords(strftime = nc_DC.strftime.values.astype('datetime64[ns]'))  
DMC  = nc_DMC.assign_coords(strftime = nc_DMC.strftime.values.astype('datetime64[ns]'))  
DSR  = nc_DSR.assign_coords(strftime = nc_DSR.strftime.values.astype('datetime64[ns]'))  
FFMC = nc_FFMC.assign_coords(strftime = nc_FFMC.strftime.values.astype('datetime64[ns]')) 
FWI  = nc_FWI.assign_coords(strftime = nc_FWI.strftime.values.astype('datetime64[ns]')) 
ISI  = nc_ISI.assign_coords(strftime = nc_ISI.strftime.values.astype('datetime64[ns]'))  
M    = nc_M.assign_coords(strftime = nc_M.strftime.values.astype('datetime64[ns]')) 
WIND = nc_WIND.assign_coords(strftime = nc_WIND.strftime.values.astype('datetime64[ns]'))


In [5]:
T2 = nc_tem.copy(deep = True)
contar = int(len(T2.strftime)*0.8)

pre_train       = pre[:contar,:,:].to_dataframe()
eva_train       = eva[:contar,:,:].to_dataframe()
rad_train       = rad[:contar,:,:].to_dataframe()
soiltem_train   = soiltem[:contar,:,:].to_dataframe()
leafhigh_train  = leafhigh[:contar,:,:].to_dataframe()
skin_train      = skin[:contar,:,:].to_dataframe()
tem_train       = tem[:contar,:,:].to_dataframe()
DMC_train       = DMC[:contar,:,:].to_dataframe()
DSR_train       = DSR[:contar,:,:].to_dataframe()
WIND_train      = WIND[:contar,:,:].to_dataframe()


pre_test      = pre[contar:,:,:].to_dataframe()
eva_test      = eva[contar:,:,:].to_dataframe()
rad_test      = rad[contar:,:,:].to_dataframe()
soiltem_test  = soiltem[contar:,:,:].to_dataframe()
leafhigh_test = leafhigh[contar:,:,:].to_dataframe()
skin_test     = skin[contar:,:,:].to_dataframe()
tem_test      = tem[contar:,:,:].to_dataframe()
DMC_test      = DMC[contar:,:,:].to_dataframe()
DSR_test      = DSR[contar:,:,:].to_dataframe()
WIND_test     = WIND[contar:,:,:].to_dataframe()

In [17]:
data_train = pd.concat([pre_train, eva_train,rad_train,soiltem_train,leafhigh_train,skin_train,DMC_train,DSR_train,WIND_train,tem_train],axis = 1)
data_test = pd.concat([pre_test, eva_test,rad_test,soiltem_test,leafhigh_test,skin_test,DMC_test,DSR_test,WIND_test,tem_test],axis = 1)

x_train = data_train.iloc[:,:-1]
y_train = data_train.iloc[:,-1:]

x_test = data_test.iloc[:,:-1]
y_test = data_test.iloc[:,-1:]

In [216]:
regr = RandomForestRegressor(n_estimators=100,criterion='squared_error')
regr.fit(x_train, y_train.values.ravel())

In [281]:
# save
#joblib.dump(regr, "random_forest.joblib")

['random_forest.joblib']

In [18]:
loaded_rf = joblib.load("random_forest.joblib")
rf_load = loaded_rf.predict(x_test)

In [19]:
prediccion = loaded_rf.predict(x_test)