In [None]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.metrics import r2_score
import matplotlib.pyplot as plt
from sklearn.linear_model import ElasticNetCV, ElasticNet

%matplotlib inline

In [None]:
# データの読み込み
df=pd.read_csv('Table_XX_all_data_ElasticNet.csv')

#X yの設定
#Xの.dropで行or列を削除、axis=1で行を削除と指定。削除する行の指定は'target'で行う。
X=df.drop('target',axis=1) 

#yはdf内からtargetのみを取り出す。
y=df.target

In [None]:
X.head()

In [None]:
# ElasticNetCrossValidation (→GridSearch)
cv_model = ElasticNetCV(l1_ratio=[.1, .5, .7, .9, .95, .99, .995, 1], eps=0.001, n_alphas=100, fit_intercept=True, 
                        normalize=True, precompute='auto', max_iter=2000, tol=0.0001, cv=10, 
                        copy_X=True, verbose=0, n_jobs=-1, positive=False, random_state=None, selection='cyclic')

In [None]:
cv_model.fit(X, y)

In [None]:
#最適なパラメータ出力
print('Optimal alpha: %.8f'%cv_model.alpha_)
print('Optimal l1_ratio: %.3f'%cv_model.l1_ratio_)
print('Number of iterations %d'%cv_model.n_iter_)

In [None]:
#CrossValidation parameterを利用
model = ElasticNet(l1_ratio=cv_model.l1_ratio_, alpha = cv_model.alpha_, max_iter=cv_model.n_iter_, fit_intercept=True, normalize = True)
model.fit(X, y)

In [None]:
print(r2_score(y, model.predict(X)))

In [None]:
# Feature importance 出力
feature_importance = pd.Series(index = X.columns, data = np.abs(model.coef_))

n_selected_features = (feature_importance>0).sum()
print('{0:d} features, reduction of {1:2.2f}%'.format(n_selected_features,(1-n_selected_features/len(feature_importance))*100))

feature_importance.sort_values().tail(30).plot(kind = 'bar', figsize = (18,6))