In [1]:
from models import LinearRegression, BoostingModel
from train import ClimateDataset, loss_rmse
import pandas as pd
import torch
import numpy as np

def feature_importance(booster):
    return pd.DataFrame({
        'feature': booster.feature_name(),
        'importance': booster.feature_importance(importance_type='gain')
    }).sort_values('importance', ascending=False).reset_index(drop=True)

In [2]:
ds = ClimateDataset('aice', lead_times=[3], periods=[3])
model = BoostingModel(variables=ds.variables.copy())
model.fit(ds)
feature_importance(model.model)

Unnamed: 0,feature,importance
0,aice,243846.5776
1,cos_lat,16729.302294
2,ps,8547.605698
3,t850,7712.167528
4,sin_lon,7097.492949
5,olr,6993.244529
6,h500,6824.411523
7,hice,6786.388554
8,v850,6028.637073
9,sst,5990.5645


In [3]:
ds = ClimateDataset('swe', lead_times=[3], periods=list(range(44, 49)))
model = BoostingModel(variables=ds.variables.copy())
model.fit(ds)
feature_importance(model.model)

Unnamed: 0,feature,importance
0,t2min,540728356.0
1,swe,345903520.0
2,cos_lat,290962796.0
3,cos_lon,237870206.0
4,sin_lon,221507471.0
5,tvl,218294927.0
6,sdor,189033583.0
7,h500,168372332.0
8,t2,158488576.0
9,ww,147749781.0


In [4]:
ds = ClimateDataset('aice', lead_times=[3], periods=[3], normed=True)
variables = [v for v in ds.variables if v not in ['cos_period', 'sin_period']]
ds.set_variables(variables)

model = LinearRegression(variables=variables)
model.fit(ds)

pd.DataFrame({
    'feature': model.variables,
    'weights': model.weights,
    'abs_weights': np.abs(model.weights.numpy())
}).sort_values('abs_weights', ascending=False).reset_index(drop=True)

Unnamed: 0,feature,weights,abs_weights
0,cos_lat,-0.118451,0.118451
1,aice,0.108433,0.108433
2,t2min,-0.04327,0.04327
3,t2,0.042158,0.042158
4,sst,0.032601,0.032601
5,t850,-0.02428,0.02428
6,h500,0.021282,0.021282
7,t2max,-0.012351,0.012351
8,ps,-0.010795,0.010795
9,cld,0.00914,0.00914


In [9]:
ds = ClimateDataset('swe', lead_times=[3], periods=list(range(44, 49)), normed=True)
variables = []
for v in ds.variables:
    if v in ['cos_period', 'sin_period'] or 'tvl' in v or 'tvh' in v or 'slt' in v:
        continue
    variables.append(v)
ds.set_variables(variables)

model = LinearRegression(variables=variables)
model.fit(ds)

pd.DataFrame({
    'feature': model.variables,
    'weights': model.weights,
    'abs_weights': np.abs(model.weights.numpy())
}).sort_values('abs_weights', ascending=False).reset_index(drop=True)

Unnamed: 0,feature,weights,abs_weights
0,t2min,-2.872879,2.872879
1,h500,-2.096758,2.096758
2,t850,1.433057,1.433057
3,cld,1.39222,1.39222
4,t2,-1.116005,1.116005
5,v850,1.050917,1.050917
6,uv10,0.74246,0.74246
7,olr,0.718844,0.718844
8,swe,0.609597,0.609597
9,ps,0.58643,0.58643
