In [1]:
import sys as sys 
sys.path.append('..')

import numpy as np 
import pandas as pd
from ModelTools.model import CentralRegression,QuantileRegression
import plotnine as gg


### 通过控制得分函数的方法来选择不同的模型

In [None]:
n = 500
rng = np.random.default_rng(0)
x = rng.normal(size=n,loc=0,scale=0.5)
y = rng.lognormal(mean=5*np.cos(x),sigma=0.5)
df = pd.DataFrame(data={'x':x,'y':y,'ts':pd.date_range(start='2000-01-01',periods=n,freq='min')})

fit_mse = CentralRegression(data=df,col_x=['x'],col_y='y',exp_model=False,cv_score='mse')
fit_mse.fit(print_result=False).fit_final_model(print_result=False)

fit_mdae = CentralRegression(data=df,col_x=['x'],col_y='y',exp_model=False,cv_score='mdae')
fit_mdae.fit(print_result=False).fit_final_model(print_result=False)

fit_mape = CentralRegression(data=df,col_x=['x'],col_y='y',exp_model=False,cv_score='mape')
fit_mape.fit(print_result=False).fit_final_model(print_result=False)

fit_qr = QuantileRegression(data=df,col_x=['x'],col_y='y',exp_model=False,quantile=0.95)
fit_qr.fit(print_result=False).fit_final_model(print_result=False)

gg.options.figure_size = [15,5]
(
    gg.ggplot(data=df)
    +gg.aes(x='x',y=y)
    +gg.geom_point()
    +gg.geom_line(gg.aes(y=fit_mse.predict(),color="'mse'"),size=1)
    +gg.geom_line(gg.aes(y=fit_mdae.predict(),color="'mdae'"),size=1)
    +gg.geom_line(gg.aes(y=fit_mape.predict(),color="'mape'"),size=1)
    +gg.geom_line(gg.aes(y=fit_qr.predict(),color="'QR'"),size=1)
)

### 检查测试集的数据相对于训练集数据而言的Novelty Score

In [25]:
def style_func(sr:pd.Series):
    if sr.name in ['R2']:
        style = np.where(sr>sr.quantile(q=0.8),None,'opacity: 20%;')
    else:
        style = np.where(sr<sr.quantile(q=0.2),None,'opacity: 20%;')
    return style

m.MetricTrain.get_metric().style.apply(lambda sr:style_func(sr))

Unnamed: 0,R2,MSE,MAE,MBE,MdAE,MAPE,MaxE,SAE,SAPE
OLS,0.558272,0.29482,0.437541,0.0,0.391404,0.091687,1.733857,0.321524,0.077373
inter_sp_OLS,0.630039,0.246921,0.40012,-0.0,0.35499,0.083811,1.57881,0.29466,0.069895
inter_sp_std_EN,0.523959,0.317722,0.455159,-0.0,0.405565,0.096487,1.749208,0.332494,0.083512
inter_sp_std_HUBER,0.629427,0.24733,0.399922,-0.001783,0.356007,0.083678,1.58753,0.295621,0.069934
inter_sp_std_LAR,0.630039,0.246921,0.40012,-0.0,0.35499,0.083811,1.57881,0.29466,0.069895
poly_OLS,0.620801,0.253087,0.406424,0.0,0.353373,0.085221,1.643975,0.29649,0.070785
poly_std_EN,0.38406,0.411093,0.517429,-0.0,0.455075,0.109737,1.864037,0.37863,0.094847
poly_std_HUBER,0.620615,0.253211,0.406538,-0.000216,0.354602,0.085238,1.649592,0.296543,0.070784
poly_std_LAR,0.620801,0.253087,0.406424,-0.0,0.353373,0.085221,1.643975,0.29649,0.070785
sp_OLS,0.630052,0.246913,0.400096,-0.000119,0.353246,0.083811,1.579639,0.294679,0.069888


In [2]:
n = 500
rng = np.random.default_rng(0)

x1 = rng.normal(size=n,loc=0,scale=1)
x2 = rng.normal(size=n,loc=0,scale=1)
y = rng.normal(loc=np.sin(x2)+5,scale=0.5)

df = pd.DataFrame(data={'x1':x1,'x2':x2,'y':y,'ts':pd.date_range(start='2000-01-01',periods=n,freq='min')})

m = CentralRegression(data=df,col_x=['x2'],col_y='y',col_ts='ts',ts_freq='min',exp_model=False)
m.fit(base=['lm']).fit_final_model()

m.check_novelty()

100%|██████████| 16/16 [00:03<00:00,  5.07it/s]


Best Model(CV)   : poly_std_HUBER (MSE) 
Hyperparameters  : poly__degree=3 
Train Test Split : test_size=0.3, shuffle=False, random_state=0 
Cross Validation : KFold(n_splits=5, random_state=None, shuffle=False) 
 
           R2     MSE     MAE      MBE    MdAE    MAPE    MaxE     SAE    SAPE
-----  ------  ------  ------  -------  ------  ------  ------  ------  ------
Train  0.6206  0.2532  0.4065  -0.0002  0.3546  0.0852  1.6496  0.2965  0.0708
Test   0.5311  0.2663  0.412   -0.0146  0.3274  0.0874  1.3141  0.3107  0.0749 
 

Final Model : poly_std_HUBER 
                  R2     MSE     MAE     MBE    MdAE    MAPE    MaxE     SAE    SAPE
------------  ------  ------  ------  ------  ------  ------  ------  ------  ------
Train & Test  0.5983  0.2567  0.4071  0.0062  0.3522  0.0859  1.6697  0.3016  0.0731


### 模型的的预测效果检验

In [None]:
n = 500
rng = np.random.default_rng(0)

x1 = rng.normal(size=n,loc=0,scale=1)
x2 = rng.normal(size=n,loc=0,scale=1)
y = rng.normal(loc=np.sin(x2)+5,scale=0.5)

df = pd.DataFrame(data={'x1':x1,'x2':x2,'y':y,'ts':pd.date_range(start='2000-01-01',periods=n,freq='min')})

m = CentralRegression(data=df,col_x=['x2'],col_y='y',col_ts='ts',ts_freq='min',exp_model=False)
m.fit(base=['lm']).fit_final_model()

m.MetricTest.plot_Rts(highlight_y=['OLS','poly_OLS'],add_focus=False)

In [None]:
import plotnine as gg

(
    gg.ggplot(data=df)
    +gg.aes(x='x2',y=y)
    +gg.geom_point()
    +gg.geom_line(gg.aes(y=m.predict()),color='red',size=1)
)

In [None]:
m.fit_final_model()
x = m.predict_interval(type='confidence',n_bootstrap=1000)
(
    pd.DataFrame(x)
    .assign(x=x2,mean=m.predict(),real=np.cos(x2),y=y)
    .pipe(gg.ggplot)
    +gg.aes(x='x')
    + gg.geom_point(gg.aes(y='y'),alpha=0.1)
    +gg.geom_line(gg.aes(y='real'),color='red')
    +gg.geom_line(gg.aes(y='down'),color='green')
    +gg.geom_line(gg.aes(y='high'),color='green')
)

In [None]:
n = 500
rng = np.random.default_rng(0)
x1 = rng.uniform(-3,3,size=n)
# y = rng.lognormal(mean=x1,sigma=0.1)
y = rng.normal(loc=np.cos(x1)+1,scale=0.2)
# y = rng.normal(loc=np.cos(x1)+1,scale=(x1-min(x1))/np.ptp(x1)+0.01)
df = pd.DataFrame(data={'x1':x1,'y':y})

# print(df)

m = MeanRegression(data=df,col_x=['x1'],col_y='y',exp_model=False)
m.fit(base=['lm'])
m.fit_final_model()

x = m.predict_interval(type='predict',n_bootstrap=100,alpha=0.01)
(
    pd.DataFrame(x)
    .assign(x=x1,m=m.predict(),real=2*x1,y=y)
    .pipe(gg.ggplot)
    +gg.aes(x='x')
    + gg.geom_point(gg.aes(y='y'),alpha=0.1)
    # +gg.geom_line(gg.aes(y='real'),color='red')
    +gg.geom_line(gg.aes(y='m'),color='yellow')
    +gg.geom_line(gg.aes(y='down'),color='green')
    +gg.geom_line(gg.aes(y='high'),color='blue')
)

In [None]:
x={None:1}
x