## Feature selection
Elastic net regularisation 
*"Elastic net indicates a regularization procedure used to reduce the number of features included in the model"*
- Feature reduction
- 5 fold CV?

In [None]:
from sklearn.linear_model import ElasticNet

elastic_net = ElasticNet(alpha=0.1, l1_ratio=0.5, max_iter=10000, random_state=42)
elastic_net.fit(X_train, y_train)

# find the most important features: 
coef = pd.Series(elastic_net.coef_, index = X_train.columns)


print("Elastic Net picked " + str(sum(coef != 0)) + " variables and eliminated the other " +  str(sum(coef == 0)) + " variables")



In [None]:
import matplotlib
import matplotlib.pyplot as plt

imp_coef = pd.concat([coef.sort_values().head(10),
                     coef.sort_values().tail(10)])

matplotlib.rcParams['figure.figsize'] = (8.0, 10.0)
imp_coef.plot(kind = "barh")
plt.title("Coefficients in the Elastic Net Model")

### Hyperparameters

*"Tuning hyperparameters for ElasticNet Regression Model"* 

https://dataaspirant.com/elasticnet-regression-python/ 


In [None]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'alpha': [0.1, 1.0, 10.0],
    'l1_ratio': [0.2, 0.5, 0.8],
}

grid_search = GridSearchCV(estimator=elastic_net, param_grid=param_grid, cv=5)
grid_search.fit(X_train, y_train)

print('Best alpha:', grid_search.best_estimator_.alpha)
print('Best l1_ratio:', grid_search.best_estimator_.l1_ratio)

## Model performance

In [None]:
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

y_pred = elastic_net.predict(X_test)
mse = mean_squared_error(y_test, y_pred)

print('Mean squared error:', mse)



