In [None]:
pip install streamlit



## Importing all necessary Libraries

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import streamlit as st

##Loading Dataset & Getting a Preview

In [None]:
housing = pd.read_csv('property_rent_data.csv')


Dataset Preview

In [None]:
housing.head()

Unnamed: 0,Type,Number of Rooms,Number of Bathrooms,Area (sqft),Amenities Distance Avg (km),Parking Spaces,Age (years),Location,Crime Rate,Lower Status Population,Avg People in Building,Rent Price
0,Flat,4,1,922.441,2.0858,0,22.3,Saket,1.29,0.24,43.8702,42641.971016
1,PG,1,1,156.777,14.2736,0,8.7,Mayur Vihar,0.84,8.53,2.3291,8924.301837
2,Flat,4,2,994.201,14.8662,2,14.2,Mayur Vihar,1.43,6.4,49.8276,47633.595382
3,Flat,3,2,618.653,8.5457,2,15.6,Vasant Kunj,0.72,1.98,46.9214,38008.168384
4,Flat,3,1,799.278,3.1601,1,16.3,New Delhi,1.37,1.82,27.9711,43123.397382



## Display information about the dataset

In [None]:
st.write("## Flat/PG Rent Prediction")
st.write("### Sample Dataset")
st.write(housing.head())

## Check for missing values

In [None]:
missing_values = housing.isnull().sum()
if missing_values.any():
    st.write("### Missing Values in Dataset")
    st.write(missing_values)
    st.warning("Dataset contains missing values. Please preprocess accordingly.")


## Separate features and target variable

In [None]:
X = housing.drop('Rent Price', axis=1)  # Features
y = housing['Rent Price']  # Target variable

## Preprocess categorical variables (one-hot encoding)

In [None]:
X = pd.get_dummies(X, drop_first=True)  # One-hot encode categorical variables

## Train-Test Split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Train multiple models

In [123]:

models = {
    #"Linear Regression": LinearRegression(),
    #"Random Forest": RandomForestRegressor(),
    "Gradient Booster": GradientBoostingRegressor()
}

##Evaluation Metrics - R-squared, MSE, RMSE, MAE

In [124]:
model_performance = {}
model_statistics = {}


for model_name, model in models.items():
    model.fit(X_train, y_train)
    y_pred_train = model.predict(X_train)
    y_pred_test = model.predict(X_test)
    train_r2 = r2_score(y_train, y_pred_train)
    test_r2 = r2_score(y_test, y_pred_test)
    test_mse = mean_squared_error(y_test, y_pred_test)
    test_rmse = np.sqrt(test_mse)
    test_mae = mean_absolute_error(y_test, y_pred_test)

In [125]:
 model_performance[model_name] = {
        "Train R2": train_r2,
        "Test R2": test_r2,
        "Test MSE": test_mse,
        "Test RMSE": test_rmse,
        "Test MAE": test_mae
    }

In [126]:
# Print the performance metrics to the terminal
print(f"Model: {model_name}")
print(f"Train R2: {train_r2}")
print(f"Test R2: {test_r2}")
print(f"Test MSE: {test_mse}")
print(f"Test RMSE: {test_rmse}")
print(f"Test MAE: {test_mae}")
print("")

Model: Gradient Booster
Train R2: 0.9969405038749669
Test R2: 0.9957105411882591
Test MSE: 715034.8020481498
Test RMSE: 845.5973048964559
Test MAE: 679.8078337152535



In [127]:
model_performance_df = pd.DataFrame(model_performance).T
best_model_name = model_performance_df['Test R2'].idxmax()
best_model = models[best_model_name]


##Model Statistics

In [128]:
residuals = y_test - y_pred_test
difference_percent = (residuals / y_test) * 100

statistics = {
        "Prediction": y_pred_test,
        "Target": y_test,
        "Residual": residuals,
        "Difference%": difference_percent
    }
statistics_df = pd.DataFrame(statistics)
model_statistics[model_name] = statistics_df.describe()

In [129]:
model_performance_df = pd.DataFrame(model_performance).T
print("## Model Performance Metrics")
print(model_performance_df)

# Display detailed statistics for each model
for model_name, stats in model_statistics.items():
    print(f"### {model_name}:")
    print(stats)

## Model Performance Metrics
                    Test MAE       Test MSE   Test R2   Test RMSE  Train R2
Gradient Booster  679.807834  715034.802048  0.995711  845.597305  0.996941
### Gradient Booster:
         Prediction        Target     Residual  Difference%
count   1176.000000   1176.000000  1176.000000  1176.000000
mean   25684.129206  25671.325434   -12.803772    -0.556522
std    12726.545250  12916.565978   845.860075     4.295016
min     8213.524109   7032.954254 -2959.730405   -21.947391
25%    14601.069989  14457.304053  -621.231950    -2.755498
50%    22652.859011  22550.039252    -1.532981    -0.010881
75%    36294.182109  35816.072695   575.566925     2.226874
max    63900.427646  65404.474401  3209.211187    11.711150


## Function to predict rent based on user input

In [130]:
def predict_rent(input_data, model):
    # Transform input data (ensure it matches the training data format)
    input_features = pd.DataFrame(input_data, index=[0])
    input_features = pd.get_dummies(input_features, drop_first=True)

    # Align input features with the training data
    missing_cols = set(X.columns) - set(input_features.columns)
    for col in missing_cols:
        input_features[col] = 0
    input_features = input_features[X.columns]

    # Predict rent price
    predicted_rent = model.predict(input_features)
    return predicted_rent[0]

## Streamlit app for user input and prediction

In [131]:
st.sidebar.title("House Rent Prediction App")

DeltaGenerator(_root_container=1, _parent=DeltaGenerator())

In [132]:
# Input fields for user to enter property details
st.sidebar.header("Enter Property Details")
input_features = {}
for col in X.columns:
    if col.startswith('Type') or col.startswith('Location'):  # Handle categorical columns separately
        original_col = col.split('_')[0]
        if original_col not in input_features:
            input_features[original_col] = st.sidebar.selectbox(original_col, sorted(housing[original_col].unique()))
    else:
        col_min = float(X[col].min()) if not pd.isnull(X[col].min()) else 0.0
        col_max = float(X[col].max()) if not pd.isnull(X[col].max()) else 0.0
        col_mean = float(X[col].mean()) if not pd.isnull(X[col].mean()) else 0.0
        input_features[col] = st.sidebar.number_input(col, min_value=col_min, max_value=col_max, value=col_mean)

# Predict button
if st.sidebar.button("Predict Rent"):
    input_data = pd.DataFrame([input_features])
    predicted_price = predict_rent(input_data, best_model)
    st.sidebar.success(f"Predicted Rent Price: Rs.{predicted_price:.2f}")