In [1]:
import pandas as pd
import pickle
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

# Load dataset
df = pd.read_csv("chennai_land_data.csv")

# Encode categorical columns
le_locality = LabelEncoder()
le_land = LabelEncoder()
le_road = LabelEncoder()

df["locality"] = le_locality.fit_transform(df["locality"])
df["land_type"] = le_land.fit_transform(df["land_type"])
df["road_access"] = le_road.fit_transform(df["road_access"])

# Features & target
X = df[["area","locality","distance","road_access","land_type"]]
y = df["price"]

# Train test split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2)

# Train model
model = RandomForestRegressor(n_estimators=300)
model.fit(X_train,y_train)

# Accuracy
pred = model.predict(X_test)
accuracy = r2_score(y_test,pred)

print("Model Accuracy:",accuracy)

# Save model + encoders
pickle.dump({
    "model":model,
    "le_locality":le_locality,
    "le_land":le_land,
    "le_road":le_road,
    "accuracy":accuracy
}, open("model.pkl","wb"))


Model Accuracy: 0.8059019443477948
