In [None]:
from sklearn.datasets import fetch_california_housing
from sklearn.linear_model import Ridge, LinearRegression
from sklearn.model_selection import GridSearchCV,train_test_split
from sklearn.metrics import mean_squared_error

In [None]:
data = fetch_california_housing()
X,y = data.data,data.target

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Define parameter grid for ridge regression

In [None]:
params = {'alpha':[0.1, 1.0, 10.0, 100.0]}

## Setup GridSearchCV

In [None]:
grid = GridSearchCV(Ridge(), param_grid = params, cv = 5, scoring = 'neg_mean_squared_error')
grid.fit(X_train, y_train)

##ridge - gives weight to important features

## Print the hyperparameters

In [None]:
print("Best Hyperparameters:", grid.best_params_)

Best Hyperparameters: {'alpha': 10.0}


## Train final model and best alpha

In [None]:
best_alpha = grid.best_params_['alpha']
final_model = Ridge(alpha = best_alpha)
final_model.fit(X_train, y_train)

## Evaluate on test data

In [None]:
y_pred_ridge = final_model.predict(X_test)
mse_ridge = mean_squared_error(y_test, y_pred_ridge)
print("MSE_test_data (Ridge):", mse_ridge)

MSE_test_data (Ridge): 0.5550405537342994


In [None]:
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)
y_pred_linear = linear_model.predict(X_test)
mse_linear = mean_squared_error(y_test, y_pred_linear)
print("MSE_test_data (Linear):", mse_linear)

MSE_test_data (Linear): 0.5558915986952422


## Logistic Regression with gridsearchcv

In [None]:
from sklearn.datasets import load_iris
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

## Loading the data

In [None]:
data = load_iris()
X, y = data.data, (data.target == 0).astype(int) ## 1 setosa --- 0 not setosa

## Train test split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Setup the model

In [None]:
model = LogisticRegression(solver = 'liblinear') ##liblinear gives the best weight (when we use c and panalty)
model.fit(X_train, y_train)

## List params

In [None]:
params = {
    'C':[0.01, 0.1, 1,10],
    'penalty':['l1', 'l2'] #ridge and lasso
}
##Makes combination for each
##less c is better

##lasso gives more priority to important feature

## Setup grid search

In [None]:
grid = GridSearchCV(model, params, cv = 5,scoring ='accuracy')
grid.fit(X_train, y_train)

## Best param


In [None]:
print("best params:", grid.best_params_)

best params: {'C': 0.1, 'penalty': 'l1'}


## Evaluate the model

In [None]:
best_model = grid.best_estimator_
y_pred = best_model.predict(X_test)

## Model performance

In [None]:
#3Accuracy of best estimated logistic model using grid search
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 1.0


In [None]:
##Accuracy of base logistic model
model = LogisticRegression()
model.fit(X_train, y_train)
y_pred1 = model.predict(X_test)

accuracy1 = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy1)

Accuracy: 1.0
