### Import Neccesory Libraries

In [48]:
import numpy as np
import pandas as pd
import mlflow
import mlflow.sklearn
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error



In [49]:
df = pd.read_csv(r"D:\Melbin\SELF\House-Price-Prediction\data\final_dataset.csv")
df.head()

Unnamed: 0,Amount in rupees,Price (in rupees),location,Carpet Area in sqft,Floor,Transaction,Furnishing,facing,overlooking,Bathroom,Balcony,Ownership
0,-0.202606,-0.06108617,1.881061,-0.333482,-1.415544,0.547069,1.07663,1.063822,1.057353,-1.653261,-0.000319,1.341873
1,-0.056796,0.2397217,1.881061,-0.345214,0.378898,0.547069,-0.458287,-1.146945,-1.327164,-0.492814,-0.000319,-0.743276
2,0.05256,0.3824695,1.881061,-0.212259,-1.352335,0.547069,1.07663,-1.146945,-1.327164,-0.492814,-0.000319,-0.743276
3,-0.246869,1.052378e-16,1.881061,-0.320448,-1.47173,0.547069,1.07663,1.063822,1.057353,-1.653261,-1.22302,1.341873
4,0.104635,0.4335362,1.881061,-0.274826,-0.218078,0.547069,1.07663,1.379645,-1.201663,-0.492814,-0.000319,-1.438326


### Data splitting

In [50]:
X = df.drop(columns=['Amount in rupees'])
y = df['Amount in rupees']

### Train test split

In [51]:
from urllib.parse import urlparse

x_train, x_test, y_train, y_test = train_test_split(X,y , test_size =0.2, random_state =42)

In [None]:
## Hyperparameter Tuning using Grid Search

def hyperparameter_tuning(x_train, y_train):
    param_grid = {
        'fit_intercept': [True, False],
        'positive': [True, False]

    }
    grid_search = GridSearchCV(LinearRegression(), param_grid, cv=5, scoring='neg_mean_squared_error')
    grid_search.fit(x_train, y_train)
    
    return grid_search


In [53]:
from mlflow.models import infer_signature

signature = infer_signature(x_train, y_train)

# Set experiment BEFORE starting the run
mlflow.set_experiment("House_Price_Prediction")

##Define the hperparameter grid
params = {
    'fit_intercept': [True, False],
     'positive': [True, False]

}

## Start MLflow experiment
with mlflow.start_run():

    
    # Log parameters
    mlflow.log_params({"param_grid_fit_intercept": params["fit_intercept"],
                       "param_grid_positive": params["positive"]})
    
    # Hyperparameter tuning
    grid_search = hyperparameter_tuning(x_train, y_train)
    
    # Get the best model
    best_model = grid_search.best_estimator_
    
    # Log the best model
    mlflow.sklearn.log_model(best_model, "model", signature=signature)
    
    # Make predictions
    predictions = best_model.predict(x_test)
    
    # Calculate and log metrics
    mse = mean_squared_error(y_test, predictions)
    mlflow.log_metric("mse", mse)
    
    # Print key results
    print("Best parameters:", grid_search.best_params_)
    print(f"Mean Squared Error: {mse}")
    print(f"Best Model: {best_model}")

    # Log model to model registry if not local
    mlflow.set_tracking_uri("http://127.0.0.1:5000")  # should ideally be set before anything else
    tracking_uri_type_store = urlparse(mlflow.get_tracking_uri()).scheme

    if tracking_uri_type_store != "file":
        mlflow.sklearn.log_model(best_model, "model", registered_model_name="Best_Linear_Regression_Model")
    else:
        mlflow.sklearn.log_model(best_model, "model", signature=signature)

2025/06/02 17:17:05 INFO mlflow.tracking.fluent: Experiment with name 'House_Price_Prediction' does not exist. Creating a new experiment.


Best parameters: {'fit_intercept': False, 'positive': False}
Mean Squared Error: 0.05652426166888758
Best Model: LinearRegression(fit_intercept=False)


Successfully registered model 'Best_Linear_Regression_Model'.
2025/06/02 17:17:10 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: Best_Linear_Regression_Model, version 1


🏃 View run hilarious-fox-586 at: http://127.0.0.1:5000/#/experiments/841662411988765565/runs/e0de9d0a7c644867b522b137404d2cb5
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/841662411988765565


Created version '1' of model 'Best_Linear_Regression_Model'.
