In [None]:
!pip install -U scikit-learn
!pip install streamlit
!pip install shap


Collecting scikit-learn
  Downloading scikit_learn-1.7.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (11 kB)
Downloading scikit_learn-1.7.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (9.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.5/9.5 MB[0m [31m81.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: scikit-learn
  Attempting uninstall: scikit-learn
    Found existing installation: scikit-learn 1.6.1
    Uninstalling scikit-learn-1.6.1:
      Successfully uninstalled scikit-learn-1.6.1
Successfully installed scikit-learn-1.7.2
Collecting streamlit
  Downloading streamlit-1.51.0-py3-none-any.whl.metadata (9.5 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.51.0-py3-none-any.whl (10.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.2/10.2 MB[0m [31m114.9 MB/s[0m eta [36m0:00:00[0m
[?25hD

In [None]:
import pandas as pd
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, FunctionTransformer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import joblib


In [None]:
# Load California Housing dataset
california = fetch_california_housing(as_frame=True)
df = california.frame

# Split features and target
X = df.drop('MedHouseVal', axis=1)
y = df['MedHouseVal']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Function to add new features
def add_features(X):
    X = X.copy()
    X['RoomsPerHousehold'] = X['AveRooms'] / (X['HouseAge'] + 1)
    X['PopulationPerHousehold'] = X['Population'] / (X['HouseAge'] + 1)
    return X

feature_engineering = FunctionTransformer(add_features)


In [None]:
original_features = X.columns.tolist()

numeric_transformer = Pipeline(steps=[
    ('scaler', StandardScaler())
])

preprocessor = ColumnTransformer(transformers=[
    ('num', numeric_transformer, original_features)
])


In [None]:
# Linear Regression
lr_pipeline = Pipeline(steps=[
    ('feature_engineering', feature_engineering),
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])

# Ridge Regression
ridge_pipeline = Pipeline(steps=[
    ('feature_engineering', feature_engineering),
    ('preprocessor', preprocessor),
    ('regressor', Ridge())
])
ridge_params = {'regressor__alpha': [0.1, 1.0, 10.0, 50.0]}
ridge_search = GridSearchCV(ridge_pipeline, ridge_params, cv=5, scoring='neg_mean_squared_error')

# Lasso Regression
lasso_pipeline = Pipeline(steps=[
    ('feature_engineering', feature_engineering),
    ('preprocessor', preprocessor),
    ('regressor', Lasso(max_iter=10000))
])
lasso_params = {'regressor__alpha': [0.001, 0.01, 0.1, 1.0]}
lasso_search = GridSearchCV(lasso_pipeline, lasso_params, cv=5, scoring='neg_mean_squared_error')


In [None]:
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    print("MAE:", mean_absolute_error(y_test, y_pred))
    print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))
    print("R2:", r2_score(y_test, y_pred))
    print("-"*40)

print("=== Linear Regression ===")
lr_pipeline.fit(X_train, y_train)
evaluate_model(lr_pipeline, X_test, y_test)

print("=== Ridge Regression ===")
ridge_search.fit(X_train, y_train)
print("Best Ridge alpha:", ridge_search.best_params_)
evaluate_model(ridge_search.best_estimator_, X_test, y_test)

print("=== Lasso Regression ===")
lasso_search.fit(X_train, y_train)
print("Best Lasso alpha:", lasso_search.best_params_)
evaluate_model(lasso_search.best_estimator_, X_test, y_test)


=== Linear Regression ===
MAE: 0.5332001304956565
RMSE: 0.7455813830127763
R2: 0.575787706032451
----------------------------------------
=== Ridge Regression ===
Best Ridge alpha: {'regressor__alpha': 0.1}
MAE: 0.5331994146968931
RMSE: 0.7455789118982769
R2: 0.5757905180002312
----------------------------------------
=== Lasso Regression ===
Best Lasso alpha: {'regressor__alpha': 0.001}
MAE: 0.5331447750392391
RMSE: 0.7446417662764214
R2: 0.5768562568705682
----------------------------------------


In [None]:
best_model = ridge_search.best_estimator_  # or whichever is best
joblib.dump(best_model, 'best_housing_model.pkl')
print("Saved best model as 'best_housing_model.pkl'")


Saved best model as 'best_housing_model.pkl'


In [None]:
# Save the app as app.py
streamlit_code = """
import streamlit as st
import pandas as pd
import joblib

model = joblib.load('best_housing_model.pkl')

st.title("California Housing Price Predictor")
st.write("Enter the details of the house:")

MedInc = st.number_input("Median Income (MedInc)", min_value=0.0, value=3.0)
HouseAge = st.number_input("House Age (HouseAge)", min_value=0.0, value=20.0)
AveRooms = st.number_input("Average Rooms (AveRooms)", min_value=0.0, value=5.0)
AveBedrms = st.number_input("Average Bedrooms (AveBedrms)", min_value=0.0, value=1.0)
Population = st.number_input("Population", min_value=0.0, value=1000.0)
AveOccup = st.number_input("Average Occupancy (AveOccup)", min_value=0.0, value=3.0)
Latitude = st.number_input("Latitude", min_value=-90.0, max_value=90.0, value=34.0)
Longitude = st.number_input("Longitude", min_value=-180.0, max_value=180.0, value=-118.0)

input_df = pd.DataFrame({
    'MedInc': [MedInc],
    'HouseAge': [HouseAge],
    'AveRooms': [AveRooms],
    'AveBedrms': [AveBedrms],
    'Population': [Population],
    'AveOccup': [AveOccup],
    'Latitude': [Latitude],
    'Longitude': [Longitude]
})

if st.button("Predict"):
    prediction = model.predict(input_df)
    st.success(f"Predicted Median House Value: ${prediction[0]*100000:.2f}")
"""

with open("app.py", "w") as f:
    f.write(streamlit_code)

print("Streamlit app saved as 'app.py'")


Streamlit app saved as 'app.py'


In [None]:
!nohup streamlit run app.py --server.port 8501 --server.address=0.0.0.0 >/dev/null 2>&1 &


In [None]:
from google.colab.output import eval_js
print(eval_js("google.colab.kernel.proxyPort(8501)"))


https://8501-m-s-3czqwc2nxwpmg-a.asia-east1-0.prod.colab.dev
