In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold

def metric(y_true, y_pred):
    return np.mean(np.sum(np.abs(y_true - y_pred), axis=0)/np.sum(y_true, axis=0))

In [2]:
train = pd.read_csv('train_scores.csv')
loading = pd.read_csv('loading.csv')
sample = pd.read_csv('sample_submission.csv')

In [3]:
test_ids = [x for x in loading.Id.values if x not in train.Id.values]

In [4]:
targets = [(0.3, 'age'),
           (0.175, 'domain1_var1'),
           (0.175, 'domain1_var2'),
           (0.175, 'domain2_var1'),
           (0.175, 'domain2_var2')]
#cols = ['b_ridge','enet','lars','lasso','ridge','sgd','svr_rbf','svr_linear','xgb','lgb','cat']
cols = ['enet','lars','lasso','ridge','svr_rbf','svr_linear','cat']

In [5]:
def rmse_cv(model):
    rmse = np.sqrt(-cross_val_score(model, X_meta.values, y_meta.values, scoring='neg_mean_squared_error', cv=7))
    return '{:.5f}'.format(rmse.mean()), '{:.3f}'.format(rmse.std())

In [6]:
final_score = 0
X_meta_test = pd.DataFrame(columns = ["Id", "age", "domain1_var1", "domain1_var2", "domain2_var1", "domain2_var2"])
X_meta_test['Id'] = test_ids

for w, target in targets:
    
    #b_ridge = pd.read_csv('preds/{}_pred_BayesianRidge.csv'.format(target))
    enet = pd.read_csv('preds/{}_pred_ElasticNet.csv'.format(target))
    lars = pd.read_csv('preds/{}_pred_Lars.csv'.format(target))
    lasso = pd.read_csv('preds/{}_pred_Lasso.csv'.format(target))
    ridge = pd.read_csv('preds/{}_pred_Ridge.csv'.format(target))
    #sgd = pd.read_csv('preds/{}_pred_SGDRegressor.csv'.format(target))
    svr_rbf = pd.read_csv('preds/svr_rbf_preds.csv')
    svr_linear = pd.read_csv('preds/svr_linear_preds.csv')
    #xgb = pd.read_csv('preds/xgb_preds.csv')
    #lgb = pd.read_csv('preds/lgb_preds.csv')
    cat = pd.read_csv('preds/cat_preds.csv')
    
    X_meta = pd.DataFrame(columns=cols, index=train.index)

    #X_meta.b_ridge = b_ridge
    X_meta.enet = enet
    X_meta.lars = lars
    X_meta.lasso = lasso
    X_meta.ridge = ridge
    #X_meta.sgd = sgd
    X_meta.svr_rbf = svr_rbf['pred_{}'.format(target)]
    X_meta.svr_linear = svr_linear['pred_{}'.format(target)]
    #X_meta.xgb = xgb['pred_{}'.format(target)]
    #X_meta.lgb = lgb['pred_{}'.format(target)]
    X_meta.cat = cat['pred_{}'.format(target)]
    
    X_meta[target] = train[target]
    X_meta = X_meta.dropna()
    print('\n', target,'\n')
    print('Printing individual scores...')
    for i, col in enumerate(cols):
        score = metric(X_meta[col], X_meta[target])
        print('{}.{} = {:.5f}'.format(i+1, col, score))
        
    #b_ridge_test = pd.read_csv('preds/test_BayesianRidge_{}.csv'.format(target))
    enet_test = pd.read_csv('preds/test_ElasticNet_{}.csv'.format(target))
    lars_test = pd.read_csv('preds/test_Lars_{}.csv'.format(target))
    lasso_test = pd.read_csv('preds/test_Lasso_{}.csv'.format(target))
    ridge_test = pd.read_csv('preds/test_Ridge_{}.csv'.format(target))
    #sgd_test = pd.read_csv('preds/test_SGDRegressor_{}.csv'.format(target))
    svr_rbf_test = pd.read_csv('preds/test_svr_rbf.csv')
    svr_linear_test = pd.read_csv('preds/test_svr_linear.csv')
    #xgb_test = pd.read_csv('preds/xgb_test.csv')
    #lgb_test = pd.read_csv('preds/lgb_test.csv')
    cat_test = pd.read_csv('preds/cat_test.csv')
    
    X_test = pd.DataFrame(columns=cols, index=train.index)

    #X_test.b_ridge = b_ridge_test
    X_test.enet = enet_test
    X_test.lars = lars_test
    X_test.lasso = lasso_test
    X_test.ridge = ridge_test
    #X_test.sgd = sgd_test
    X_test.svr_rbf = svr_rbf_test
    X_test.svr_linear = svr_linear_test
    #X_test.xgb = xgb_test
    #X_test.lgb = lgb_test 
    X_test.cat = cat_test 
    
    kf = KFold(n_splits=7, shuffle=True, random_state=0)
    y_oof = np.zeros(X_meta.shape[0])
    y_test = np.zeros((X_test.shape[0], 7))

    for f, (train_ind, val_ind) in enumerate(kf.split(X_meta)):
        train_df, val_df = X_meta.iloc[train_ind], X_meta.iloc[val_ind]
        
        model = LinearRegression()
        model.fit(train_df.iloc[:,:-1].values, train_df.iloc[:,-1].values)

        y_oof[val_ind] = model.predict(val_df.iloc[:,:-1].values)
        y_test[:, f] = model.predict(X_test.values)

    X_meta["pred_{}".format(target)] = y_oof
    X_test[target] = y_test.mean(axis=1)
    X_meta_test[target] = X_test[target]
    
    score = metric(X_meta["{}".format(target)], X_meta["pred_{}".format(target)])
    print('And final score after stacking = {:.5f}'.format(score))
    final_score += w*score
print('\n','Final Score is = ',final_score)


 age 

Printing individual scores...
1.enet = 0.14277
2.lars = 0.14950
3.lasso = 0.14355
4.ridge = 0.14277
5.svr_rbf = 0.14340
6.svr_linear = 0.14362
7.cat = 0.15096
And final score after stacking = 0.14222

 domain1_var1 

Printing individual scores...
1.enet = 0.15184
2.lars = 0.15218
3.lasso = 0.15185
4.ridge = 0.15085
5.svr_rbf = 0.15054
6.svr_linear = 0.15069
7.cat = 0.15281
And final score after stacking = 0.15070

 domain1_var2 

Printing individual scores...
1.enet = 0.15124
2.lars = 0.15124
3.lasso = 0.15129
4.ridge = 0.15122
5.svr_rbf = 0.14874
6.svr_linear = 0.14867
7.cat = 0.15452
And final score after stacking = 0.15119

 domain2_var1 

Printing individual scores...
1.enet = 0.18165
2.lars = 0.18261
3.lasso = 0.18190
4.ridge = 0.18165
5.svr_rbf = 0.17973
6.svr_linear = 0.17995
7.cat = 0.18721
And final score after stacking = 0.18167

 domain2_var2 

Printing individual scores...
1.enet = 0.17616
2.lars = 0.17720
3.lasso = 0.17773
4.ridge = 0.17612
5.svr_rbf = 0.17340
6.sv

In [7]:
sample.Predicted = ((X_meta_test.iloc[:,1:]).stack()).values

In [8]:
sample.to_csv('subs/stacked_2.csv', index=False)

In [9]:
sample

Unnamed: 0,Id,Predicted
0,10003_age,57.202069
1,10003_domain1_var1,47.549793
2,10003_domain1_var2,60.788338
3,10003_domain2_var1,48.419616
4,10003_domain2_var2,58.367350
...,...,...
29380,21753_age,42.301548
29381,21753_domain1_var1,43.539201
29382,21753_domain1_var2,46.643737
29383,21753_domain2_var1,47.644114
