In [1]:
import sklearn

In [6]:
from sklearn.datasets import  fetch_california_housing
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split,GridSearchCV,cross_val_score
from sklearn.linear_model import SGDRegressor
from sklearn.feature_selection import SelectKBest,f_regression 
from sklearn.metrics import mean_squared_error,r2_score
import numpy as np

In [23]:
X,y=fetch_california_housing(return_X_y= True)
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

#pipeline
p_l=Pipeline([("scaler",StandardScaler()),
             ("select_feature",SelectKBest(score_func=f_regression)),
             ("model",SGDRegressor(random_state=42))])
#Hyperparameter grid

param_grid={
    "select_feature__k": [4, 6,7, 8],
    "model__penalty": ["l2", "l1"],
    "model__learning_rate": ["invscaling", "adaptive"],
    "model__eta0": [1,0.1, 0.01, 0.001],
    "model__max_iter": [1000, 2000]
}

grid=GridSearchCV(
    p_l,
    param_grid,
    cv=5,
    scoring="r2",
    n_jobs=-1
)

grid.fit(X_train,y_train)

best_model=grid.best_estimator_

y_pred=best_model.predict(X_test)

print("Best Parameters:", grid.best_params_)
print("Test R2 Score:", r2_score(y_test, y_pred))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))

# 8️⃣ Cross-validation score of best model
cv_scores = cross_val_score(best_model, X_train, y_train, cv=5, scoring="r2")
print("CV Mean R2:", cv_scores.mean())

Best Parameters: {'model__eta0': 1, 'model__learning_rate': 'adaptive', 'model__max_iter': 1000, 'model__penalty': 'l2', 'select_feature__k': 7}
Test R2 Score: 0.5755249347541551
Test RMSE: 0.7458122662825726
CV Mean R2: 0.611019656317968


In [12]:
X[1]

array([ 8.30140000e+00,  2.10000000e+01,  6.23813708e+00,  9.71880492e-01,
        2.40100000e+03,  2.10984183e+00,  3.78600000e+01, -1.22220000e+02])

In [22]:
best_model.predict(X_test[1:5])

array([1.75035167, 2.72934653, 2.82246304, 2.54989046])

In [6]:
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import SGDRegressor
from sklearn.metrics import r2_score,mean_squared_error
X, y = fetch_california_housing(return_X_y=True)
x_train,x_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)
scaler=StandardScaler()
x_norm=scaler.fit_transform(x_train)

In [7]:
sgd=SGDRegressor(max_iter=1000)
result=sgd.fit(x_norm,y_train)

In [10]:
print(result.intercept_,"\n",
result.coef_)

[2.06434187] 
 [ 8.66936739e-01  1.19790249e-01 -3.08001549e-01  3.14707727e-01
 -3.07043171e-04 -8.79479986e-03 -8.94050459e-01 -8.79943790e-01]


In [11]:
result.predict(scaler.fit_transform(x_test))

array([0.70542304, 1.73018283, 2.69750686, ..., 4.63228382, 1.21803455,
       2.00888669], shape=(4128,))