In [None]:
# Import packages 
import sys
sys.path.append('../Plotting/')
""" Now you can imprt modules in ../Plotting"""

import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.tri as tri
import matplotlib.colors as colors
from scipy import interpolate as intr

#Models
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor


#Evaluation
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_val_score

import cartopy
import cartopy.crs as ccrs
import cartopy.feature as cfeature

# Some useful packages 
import importlib
import copy
import time

# IO packages
import pickle
from scipy.io import FortranFile


# Other modules in myPythonTools
import ana as a
#import xyp_plot as xyp
import ReadForest as RF


In [None]:
importlib.reload(RF)

In [None]:
# Get_a_Forest returns tuple (A_test, B_test, B_pred)
Model1 = RF.Get_a_Forest( tag="uvNridge" , DoPrediction=True )

In [None]:
Model2 = RF.Get_a_Forest( tag="uvNsgh" , DoPrediction=True )

In [None]:
Model3 = RF.Get_a_Forest( tag="uvN" , DoPrediction=True )

In [None]:
plt.scatter(Model1[1].flatten(),Model1[2].flatten())

In [None]:
print(np.shape(Model1[2]))

In [None]:
filename = "AB_uvNridge.dat"
#filename = "AB_uvNsgh.dat"

ff=FortranFile( filename , 'r')

# Integer data seems to be int64 by default
# when using scipy.io.FortranFile
ddA = ff.read_record( '<i8'   )
ddB = ff.read_record( '<i8'   )

# In current code A is a float64, due to inheriting
# double precision from ridge data ... . Inheritance must
# happen when ridge data and model history output are 
# concatenated with " np.r_ " above
qA = ff.read_record( '<f8'   ).reshape( ddA[0], ddA[1] )

# B is a float32 since it is composed of model history 
# output only.
qB = ff.read_record( '<f4'   ).reshape( ddB[0], ddB[1] )


qA_r = ff.read_record( '<f8'   ).reshape( ddA[0], ddA[1] )

qB_r = ff.read_record( '<f4'   ).reshape( ddB[0], ddB[1] )


ff.close()


In [None]:
#How to load a "pickled" RandomForest model


filename = "random_forest_full_uvNridge.pkl"
#filename = "random_forest_full_uvNsgh.pkl"
MLfile = filename

In [None]:
tic = time.perf_counter()

# load model

Model = pickle.load(open(filename, "rb"))

toc = time.perf_counter()

LoadingTime = f"Loaded model in {toc - tic:0.4f} seconds"
print(LoadingTime)

In [None]:
A_test  = qA_r[788201:,:]
B_test  = qB_r[788201:,:]


## Loaded model takes a while ...
Model prediction in 433.1365 seconds

In [None]:
tic = time.perf_counter()
B_pred=Model.predict(A_test)
toc = time.perf_counter()
PredictionTime = f"Model prediction in {toc - tic:0.4f} seconds"
print(PredictionTime)


In [None]:
plt.scatter(B_pred.flatten(),B_test.flatten())

In [None]:
y=B_pred
x=B_test
h2u_adj=np.histogram2d(y.flatten(), x.flatten() ,bins=50 , range=[[-10.,10.],[-10.,10.]] )


In [None]:
fig = plt.figure(figsize=(30, 9))


cmap=plt.cm.jet
#cmap=cm.jet
clevs=[1,3,5,10,30,50,100,300,500,1000,3000,5000,10_000,30_000,50_000,100_000 ] #np.logspace(0,3)

ax = fig.add_subplot(1, 3, 3 ) # , projection=ccrs.PlateCarree(central_longitude=0))
ax.set_aspect('equal','box')
cf = ax.contourf(h2u_adj[1][1:],h2u_adj[2][1:],h2u_adj[0],levels=clevs,cmap=cmap,norm=colors.LogNorm())
plt.colorbar(cf,ax=ax,shrink=0.6)
plt.xlabel("$Test~Tendencies $",fontsize=20)
plt.ylabel("$ML~Predicted~Tendencies $",fontsize=20)
ax.set_title( MLfile ,fontsize=20 )



In [None]:
pco = np.corrcoef( x=  B_pred[:,10].flatten(), y = B_test[:,10].flatten() )
print(pco)
print(mean_squared_error(  B_pred.flatten(), B_test.flatten() ) )

In [None]:
corrs=np.zeros(22)
slope=np.zeros(22)
for irow in np.arange(22):
    pco = np.corrcoef( x=  B_pred[:,irow].flatten(), y = B_test[:,irow].flatten() )
    corrs[irow] = pco[0,1]
    pft=np.polyfit( y=  B_pred[:,irow].flatten(), x = B_test[:,irow].flatten() ,deg=1 )
    slope[irow] = pft[0]

In [None]:
plt.plot(corrs)
plt.plot(slope)

In [None]:
plt.scatter(x=B_test[:,11].flatten(),y=B_pred[:,11].flatten())
plt.xlim(-10,10)
plt.ylim(-10,10)

In [None]:
print(np.shape(qA))

In [None]:
#plt.plot(B_test[1,0:12])
#plt.plot(B_pred[1,0:12])
plt.scatter(B_pred[:,20].flatten(),B_test[:,20].flatten())

## Re-loaded results are exact with ML_ana run from scratch and saved

In [None]:
Ashp = np.shape(qA)

idxs=np.arange( Ashp[0] )
print( np.shape(idxs) )
Ridxs = copy.deepcopy(idxs) 


tic = time.perf_counter()
np.random.shuffle(Ridxs)
toc = time.perf_counter()
ShuffleTime = f"Shuffled indices in {toc - tic:0.4f} seconds"

print(ShuffleTime)
print( np.shape(Ridxs) )


A_r = qA[ Ridxs, :]
B_r = qB[ Ridxs, :]

A_test  = A_r[788201:,:]
B_test  = B_r[788201:,:]



In [None]:
tic = time.perf_counter()
B_pred=Model.predict(A_test)
toc = time.perf_counter()
PredictionTime = f"Model prediction in {toc - tic:0.4f} seconds"
print(PredictionTime)


In [None]:
plt.scatter(B_pred.flatten(),B_test.flatten())


In [None]:
pco = np.corrcoef( x=  B_pred.flatten(), y = B_test.flatten() )
print(pco)
print(mean_squared_error(  B_pred.flatten(), B_test.flatten() ) )