In [1]:
import pandas as pd

# Load the data
house_data = pd.read_csv("House_for_rent_islamabad_pk.csv")

In [2]:
house_data

Unnamed: 0.1,Unnamed: 0,Location,Area,Bedrooms,Baths,Price
0,0,F-8,26.0,4.0,5.0,420000
1,1,DHA,11.0,3.0,4.0,100000
2,2,F-8,26.0,4.0,5.0,650000
3,3,F-6,20.0,6.0,6.0,450000
4,4,D-17,10.0,9.0,6.0,120000
...,...,...,...,...,...,...
3894,3894,F-11,20.0,5.0,6.0,500000
3895,3895,F-8,26.0,7.0,,1000000
3896,3896,E-11,20.0,7.0,6.0,600000
3897,3897,E-7,40.0,6.0,6.0,3500000


In [22]:
#print unique values in each column
for column in house_data.columns:
    print(column)
    print(house_data[column].unique())
    print()

Unnamed: 0
[   0    1    2 ... 3896 3897 3898]

Location
['F-8' 'DHA' 'F-6' 'D-17' 'Bahria' 'F-11' 'F-7' 'I-8' 'E-7' 'F-10' 'Kuri'
 'G-13' 'G-6' 'G-9' 'Shah' 'Zaraj' 'E-11' 'I-10' 'Soan' 'H-13' 'Bani'
 'D-12' 'G-11' 'G-10' 'Gulberg' 'B-17' 'Naval' 'I-14' 'Mumtaz' 'Pakistan'
 'Top' 'G-15' 'Emaar' 'F-15' 'PWD' 'I-9' 'National' 'G-8' 'Shehzad'
 'FECHS' 'Korang' 'Margalla' 'Faisal' 'E-16' 'Ghauri' 'G-7' 'G-14' 'G-12'
 'Park' 'CBR' 'I-11' 'Green' 'Chatha' 'G-16' 'Taramrri'
 'Gulshan-e-Khudadad' 'F-17' 'E-17' 'I-13' 'Tarnol' 'Khanna' 'Chak'
 'Bhara' 'Capital' 'Meherban' 'University' 'E-18' 'Constitution' 'Tarlai'
 'Police' 'Lehtarar']

Area
[2.60e+01 1.10e+01 2.00e+01 1.00e+01 5.00e+00 4.00e+01 8.00e+00 4.20e+01
 3.60e+01 2.40e+01 1.24e+01 1.40e+01 1.20e+01 2.20e+01 4.00e+00 4.80e+01
 3.20e+01 8.00e+01 6.40e+01 6.00e+01 7.20e+01 6.00e+00 4.40e+01 1.78e+01
 2.80e+01 5.00e+01 1.42e+01 1.10e+00 1.70e+01 3.80e+01 7.00e+00 6.60e+00
 1.09e+01 8.90e+00 5.60e+01 3.00e+01 1.01e+01 1.60e+01 1.95e+01 9

In [3]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import r2_score

# Drop unnecessary column
house_data_cleaned = house_data.drop(columns=['Unnamed: 0'])

# Separate features and target
X = house_data_cleaned.drop('Price', axis=1)
y = house_data_cleaned['Price']

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)


# Impute missing values for numerical data
numeric_features = ['Area', 'Bedrooms', 'Baths']
numeric_transformer = SimpleImputer(strategy='median')


# Encode categorical data
categorical_features = ['Location']
categorical_transformer = OneHotEncoder(handle_unknown='ignore')

# Create preprocessing pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Create a modeling pipeline with Gradient Boosting Regressor
gb_model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', GradientBoostingRegressor(n_estimators=100, random_state=0))
])

# Train the Gradient Boosting model
gb_model.fit(X_train, y_train)

# Predict and evaluate the Gradient Boosting model
y_gb_pred = gb_model.predict(X_test)
gb_rmse = mean_squared_error(y_test, y_gb_pred, squared=False)
gb_mae = mean_absolute_error(y_test, y_gb_pred)
gb_r2 = r2_score(y_test, y_gb_pred)

gb_rmse, gb_mae, gb_r2




(np.float64(272016.59177088446),
 np.float64(146925.3702485639),
 0.7731465106263966)

In [4]:
house_data_cleaned

Unnamed: 0,Location,Area,Bedrooms,Baths,Price
0,F-8,26.0,4.0,5.0,420000
1,DHA,11.0,3.0,4.0,100000
2,F-8,26.0,4.0,5.0,650000
3,F-6,20.0,6.0,6.0,450000
4,D-17,10.0,9.0,6.0,120000
...,...,...,...,...,...
3894,F-11,20.0,5.0,6.0,500000
3895,F-8,26.0,7.0,,1000000
3896,E-11,20.0,7.0,6.0,600000
3897,E-7,40.0,6.0,6.0,3500000


In [19]:
import joblib

# Save the model to a file
joblib.dump(gb_model, 'house_price_model.pkl')


['house_price_model.pkl']

In [23]:
def predict_price(location, area, bedrooms, baths):
    # Load the trained model from file
    model = joblib.load('house_price_model.pkl')
    
    # Create a data frame for the input
    input_data = pd.DataFrame({
        'Location': [location],
        'Area': [area],
        'Bedrooms': [bedrooms],
        'Baths': [baths]
    })
    
    # Use the model to predict the price
    predicted_price = model.predict(input_data)
    return predicted_price[0]


# Example usage
location = "DHA"  # Example location
area = 11       # Example area in appropriate units
bedrooms = 3      # Example number of bedrooms
baths = 4         # Example number of bathrooms

predicted_price = predict_price(location, area, bedrooms, baths)
print(f"Predicted Price: PKR {predicted_price:.2f}")


Predicted Price: PKR 94506.76
