In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

def load_and_encode_data(path="data/house2.csv"):
    df = pd.read_csv(path)

    # Handle nulls
    df.fillna(df.median(numeric_only=True), inplace=True)

    # Encode categorical features
    cat_cols = ["location"]
    encoders = {}
    for col in cat_cols:
        le = LabelEncoder()
        df[col + "_encoded"] = le.fit_transform(df[col])
        encoders[col] = le

    return df, encoders

In [5]:
import pickle
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from xgboost import XGBRegressor
from statsmodels.tsa.statespace.sarimax import SARIMAX

# Train & Save Models
def train_models(df):
    X = df[["sqft", "bedrooms", "bathrooms", "location_encoded"]]
    y = df["price"]

    # Linear Regression
    lr = LinearRegression().fit(X, y)
    pickle.dump(lr, open("models/linear_model.pkl", "wb"))

    # XGBoost
    xgb = XGBRegressor().fit(X, y)
    pickle.dump(xgb, open("models/xgb_model.pkl", "wb"))

    # SARIMA / SARIMAX (time series)
    ts = df.groupby("date")["price"].mean()
    sarimax = SARIMAX(ts, order=(1,1,1), seasonal_order=(1,1,1,12)).fit()
    pickle.dump(sarimax, open("models/sarimax_model.pkl", "wb"))

def load_models():
    models = {}
    for m in ["linear_model.pkl", "xgb_model.pkl", "sarimax_model.pkl"]:
        try:
            models[m] = pickle.load(open("models/" + m, "rb"))
        except:
            models[m] = None
    return models

def predict(models, new_house):
    X = pd.DataFrame([new_house])

    preds = []
    if models["linear_model.pkl"]:
        preds.append(models["linear_model.pkl"].predict(X)[0])
    if models["xgb_model.pkl"]:
        preds.append(models["xgb_model.pkl"].predict(X)[0])
    if models["sarimax_model.pkl"]:
        preds.append(models["sarimax_model.pkl"].forecast(steps=1)[0])

    return np.mean(preds)

In [6]:
from sklearn.neighbors import NearestNeighbors

def comparative_market_analysis(new_house, df, k=10):
    features = ["sqft", "bedrooms", "bathrooms", "location_encoded"]
    knn = NearestNeighbors(n_neighbors=k).fit(df[features])

    new_df = pd.DataFrame([new_house])
    distances, indices = knn.kneighbors(new_df)
    similar = df.iloc[indices[0]]

    return {
        "low": similar["price"].quantile(0.25),
        "median": similar["price"].median(),
        "high": similar["price"].quantile(0.75),
        "similar": similar
    }

In [8]:
from rasa_sdk import Action
from app.models import load_models, predict
from app.utils import load_and_encode_data, comparative_market_analysis

class ActionPredictPrice(Action):
    def name(self): return "action_predict_price"

    def run(self, dispatcher, tracker, domain):
        sqft = tracker.get_slot("sqft")
        bedrooms = tracker.get_slot("bedrooms")
        location = tracker.get_slot("location")

        df, encoders = load_and_encode_data()
        models = load_models()

        loc_encoded = encoders["location"].transform([location])[0]

        house = {"sqft": sqft, "bedrooms": bedrooms, "bathrooms": 2, "location_encoded": loc_encoded}
        price = predict(models, house)

        cma = comparative_market_analysis(house, df)

        dispatcher.utter_message(text=f"🏠 Predicted: ₹{price:,.0f}\n"
                                      f"📊 CMA Range: ₹{cma['low']:,.0f} - ₹{cma['high']:,.0f}\n"
                                      f"💡 Median: ₹{cma['median']:,.0f}")
        return []

ModuleNotFoundError: No module named 'rasa_sdk'