In [17]:
import numpy as np
import pandas as pd
from sklearn.linear_model import Lasso, Ridge
from sklearn.model_selection import cross_val_score

df = pd.read_csv('../Data/housing.csv')

In [37]:
X.mean()

CRIM       8.326673e-17
ZN         3.466704e-16
INDUS     -3.016965e-15
CHAS       3.999875e-16
NOX        3.167427e-15
RM        -1.258809e-14
AGE       -1.158274e-15
DIS        7.308603e-16
RAD       -1.068535e-15
TAX        6.534079e-16
PTRATIO   -1.084420e-14
B          8.117354e-15
LSTAT     -6.494585e-16
dtype: float64

In [3]:
X = df.iloc[:, :-1]
y = df['PRICE']

In [8]:
X = (X - X.mean()) / X.std()

In [9]:
lasso = Lasso()

In [10]:
lasso.fit(X, y)

Lasso(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

In [11]:
lasso.coef_

array([-0.        ,  0.        , -0.        ,  0.        , -0.        ,
        2.71542789, -0.        , -0.        , -0.        , -0.        ,
       -1.34428304,  0.18036988, -3.54677609])

In [14]:
alpha = 10

In [15]:
l1_term = alpha*np.sum(np.abs(lasso.coef_))

In [16]:
l1_term

77.86856908757778

In [19]:
alphas = np.logspace(-4, 4, 9)
cv_scores = []

for alpha in alphas:
    lasso.set_params(alpha=alpha)
    scores = cross_val_score(estimator=lasso, X=X, y=y, cv=10)
    cv_scores.append((np.mean(scores), alpha))

In [20]:
cv_scores

[(0.20262012278999347, 0.0001),
 (0.20343535169466956, 0.001),
 (0.21144430759385185, 0.01),
 (0.2407890782467927, 0.1),
 (0.1807548507575551, 1.0),
 (-1.2860830508551744, 10.0),
 (-1.2860830508551744, 100.0),
 (-1.2860830508551744, 1000.0),
 (-1.2860830508551744, 10000.0)]

In [22]:
lasso.get_params()

{'alpha': 10000.0,
 'copy_X': True,
 'fit_intercept': True,
 'max_iter': 1000,
 'normalize': False,
 'positive': False,
 'precompute': False,
 'random_state': None,
 'selection': 'cyclic',
 'tol': 0.0001,
 'warm_start': False}

In [23]:
from sklearn.ensemble import RandomForestRegressor

rf = RandomForestRegressor()

In [24]:
rf.fit(X, y)



RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=None,
           oob_score=False, random_state=None, verbose=0, warm_start=False)

In [27]:
rf.feature_importances_

array([0.04471742, 0.00084582, 0.00709617, 0.00046739, 0.01935113,
       0.36536017, 0.0151697 , 0.07306192, 0.00729555, 0.00865126,
       0.0182723 , 0.0086015 , 0.43110968])

In [30]:
feats = pd.DataFrame({
    'Features': X.columns,
    'Importance': rf.feature_importances_
}).sort_values(by='Importance', ascending=False)

In [34]:
feats['Importance'].cumsum()

12    0.431110
5     0.796470
7     0.869532
0     0.914249
4     0.933600
10    0.951873
6     0.967042
9     0.975694
11    0.984295
8     0.991591
2     0.998687
1     0.999533
3     1.000000
Name: Importance, dtype: float64

In [32]:
rf.score(X, y)

0.9819648943684121

In [39]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [40]:
sigmoid(0)

0.5

In [41]:
sigmoid(7)

0.9990889488055994

In [42]:
sigmoid(100)

1.0

In [43]:
sigmoid(500)

1.0

In [44]:
sigmoid(-500)

7.124576406741285e-218