In [44]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [45]:
dataset = pd.read_csv("dataset.csv")

X, y = dataset.iloc[:, :-1], dataset.iloc[:, -1]
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=200, test_size=47, random_state=1)

# Linear Regression

The first step is to apply linear regression. To do this, we use `scikit-learn`'s `LinearRegression` module.

In [23]:
from sklearn.linear_model import LinearRegression

lr = LinearRegression().fit(X_train, y_train)

In [30]:
print("Training test score: {:.2f}".format(lr.score(X_train, y_train)))
print("Test test score: {:.2f}".format(lr.score(X_test, y_test)))

Training test score: 1.00
Test test score: 0.99


### Cross-Validation

In [40]:
from sklearn.model_selection import cross_val_score

lr = LinearRegression()
scores = cross_val_score(lr, X, y, cv=10, n_jobs=-1)

print("Cross-validation score: {}".format(scores.mean()))

Cross-validation score: 0.9551299993016309


## Ridge Regression

In [31]:
from sklearn.linear_model import Ridge

ridge = Ridge().fit(X_train, y_train)

In [32]:
print("Training test score: {:.2f}".format(ridge.score(X_train, y_train)))
print("Test test score: {:.2f}".format(ridge.score(X_test, y_test)))

Training test score: 1.00
Test test score: 0.99


### Cross-Validation and Grid Search

In [53]:
best_score = 0

for alpha in [0.001, 0.01, 0.1, 1, 10, 100]:
    ridge = Ridge(alpha=alpha)

    scores = cross_val_score(ridge, X_train, y_train, cv=10, n_jobs=-1)
    
    score = np.mean(scores)
    
    print("Score with alpha={:f}: {:.2f}".format(alpha, score))
    
    if score > best_score:
        best_score = score
        best_parameters = {'alpha': alpha}

ridge = Ridge(**best_parameters)
ridge.fit(X_train, y_train)
test_score = ridge.score(X_test, y_test)

print("Best score on cross-validation: {:.2f}".format(best_score))
print("Best parameters: ", best_parameters)
print("Test set score with best parameters: {:.2f}".format(test_score))

Score with alpha=0.001000: 1.00
Score with alpha=0.010000: 1.00
Score with alpha=0.100000: 1.00
Score with alpha=1.000000: 1.00
Score with alpha=10.000000: 1.00
Score with alpha=100.000000: 1.00
Best score on cross-validation: 1.00
Best parameters:  {'alpha': 100}
Test set score with best parameters: 1.00


## Lasso Regression

In [33]:
from sklearn.linear_model import Lasso

lasso = Lasso().fit(X_train, y_train)

In [34]:
print("Training test score: {:.2f}".format(lasso.score(X_train, y_train)))
print("Test test score: {:.2f}".format(lasso.score(X_test, y_test)))

Training test score: 1.00
Test test score: 1.00


### Cross-validation and Grid Search

In [58]:
best_score = 0

for alpha in [0.001, 0.01, 0.1, 1, 10, 100]:
    for max_iter in [100, 1000, 10000, 100000]:
        lasso = Lasso(alpha=alpha, max_iter=max_iter)

        scores = cross_val_score(lasso, X_train, y_train, cv=10)

        score = np.mean(scores)

        print("Score with alpha={:f} and max_iter={:.0f}: {:.2f}".format(alpha, max_iter, score))

        if score > best_score:
            best_score = score
            best_parameters = {'alpha': alpha, 'max_iter': max_iter}

lasso = Lasso(**best_parameters)
lasso.fit(X_train, y_train)
test_score = lasso.score(X_test, y_test)

print("Best score on cross-validation: {:.2f}".format(best_score))
print("Best parameters: ", best_parameters)
print("Test set score with best parameters: {:.2f}".format(test_score))

Score with alpha=0.001000 and max_iter=100: 0.99
Score with alpha=0.001000 and max_iter=1000: 0.99
Score with alpha=0.001000 and max_iter=10000: 0.99
Score with alpha=0.001000 and max_iter=100000: 0.99
Score with alpha=0.010000 and max_iter=100: 0.99
Score with alpha=0.010000 and max_iter=1000: 0.99
Score with alpha=0.010000 and max_iter=10000: 1.00
Score with alpha=0.010000 and max_iter=100000: 1.00
Score with alpha=0.100000 and max_iter=100: 0.99
Score with alpha=0.100000 and max_iter=1000: 1.00
Score with alpha=0.100000 and max_iter=10000: 1.00
Score with alpha=0.100000 and max_iter=100000: 1.00
Score with alpha=1.000000 and max_iter=100: 1.00
Score with alpha=1.000000 and max_iter=1000: 1.00
Score with alpha=1.000000 and max_iter=10000: 1.00
Score with alpha=1.000000 and max_iter=100000: 1.00
Score with alpha=10.000000 and max_iter=100: 0.99
Score with alpha=10.000000 and max_iter=1000: 0.99
Score with alpha=10.000000 and max_iter=10000: 0.99
Score with alpha=10.000000 and max_iter

