In [None]:
import numpy as np
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv('house_prices.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,property_type,price,location,city,baths,purpose,bedrooms,Area_in_Marla
0,0,Flat,10000000,G-10,Islamabad,2,For Sale,2,4.0
1,1,Flat,6900000,E-11,Islamabad,3,For Sale,3,5.6
2,2,House,16500000,G-15,Islamabad,6,For Sale,5,8.0
3,3,House,43500000,Bani Gala,Islamabad,4,For Sale,4,40.0
4,4,House,7000000,DHA Defence,Islamabad,3,For Sale,3,8.0


In [None]:
df.drop(['Unnamed: 0'], axis=1, inplace=True)
df.head()

Unnamed: 0,property_type,price,location,city,baths,purpose,bedrooms,Area_in_Marla
0,Flat,10000000,G-10,Islamabad,2,For Sale,2,4.0
1,Flat,6900000,E-11,Islamabad,3,For Sale,3,5.6
2,House,16500000,G-15,Islamabad,6,For Sale,5,8.0
3,House,43500000,Bani Gala,Islamabad,4,For Sale,4,40.0
4,House,7000000,DHA Defence,Islamabad,3,For Sale,3,8.0


In [None]:
print(df)

      property_type     price             location       city  baths  \
0              Flat  10000000                 G-10  Islamabad      2   
1              Flat   6900000                 E-11  Islamabad      3   
2             House  16500000                 G-15  Islamabad      6   
3             House  43500000            Bani Gala  Islamabad      4   
4             House   7000000          DHA Defence  Islamabad      3   
...             ...       ...                  ...        ...    ...   
99494          Flat   7500000  Bahria Town Karachi    Karachi      3   
99495         House   8800000  Bahria Town Karachi    Karachi      4   
99496         House  14000000  Bahria Town Karachi    Karachi      3   
99497         House  14000000  Bahria Town Karachi    Karachi      4   
99498         House   9000000  Bahria Town Karachi    Karachi      3   

        purpose  bedrooms  Area_in_Marla  
0      For Sale         2            4.0  
1      For Sale         3            5.6  
2     

In [None]:
df.shape

(99499, 8)

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 99499 entries, 0 to 99498
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   property_type  99499 non-null  object 
 1   price          99499 non-null  int64  
 2   location       99499 non-null  object 
 3   city           99499 non-null  object 
 4   baths          99499 non-null  int64  
 5   purpose        99499 non-null  object 
 6   bedrooms       99499 non-null  int64  
 7   Area_in_Marla  99499 non-null  float64
dtypes: float64(1), int64(3), object(4)
memory usage: 6.1+ MB


In [None]:
df.describe()

Unnamed: 0,price,baths,bedrooms,Area_in_Marla
count,99499.0,99499.0,99499.0,99499.0
mean,10375920.0,3.530106,3.350737,8.757479
std,10660000.0,1.474217,1.255913,7.466471
min,15500.0,1.0,0.0,0.0
25%,150000.0,2.0,2.0,4.8
50%,7500000.0,3.0,3.0,6.7
75%,15500000.0,5.0,4.0,10.0
max,44900000.0,7.0,6.0,194.0


In [None]:
df.isnull().sum()

Unnamed: 0,0
property_type,0
price,0
location,0
city,0
baths,0
purpose,0
bedrooms,0
Area_in_Marla,0


In [None]:
df = df.drop_duplicates().reset_index(drop=True)
df.shape

(61641, 8)

In [None]:
# converting marla to area in sq ft
df['area'] = df['Area_in_Marla'] * 272.25
df.drop('Area_in_Marla',axis=1, inplace = True)

df.head()

Unnamed: 0,property_type,price,location,city,baths,purpose,bedrooms,area
0,Flat,10000000,G-10,Islamabad,2,For Sale,2,1089.0
1,Flat,6900000,E-11,Islamabad,3,For Sale,3,1524.6
2,House,16500000,G-15,Islamabad,6,For Sale,5,2178.0
3,House,43500000,Bani Gala,Islamabad,4,For Sale,4,10890.0
4,House,7000000,DHA Defence,Islamabad,3,For Sale,3,2178.0


In [None]:
# re-arranging features
df = df[["property_type", "location", "city", "purpose", "baths", "bedrooms", "area", "price"]]
df.columns = ["type", "location", "city", "purpose", "baths", "beds", "area", "price"]

df.head()

Unnamed: 0,type,location,city,purpose,baths,beds,area,price
0,Flat,G-10,Islamabad,For Sale,2,2,1089.0,10000000
1,Flat,E-11,Islamabad,For Sale,3,3,1524.6,6900000
2,House,G-15,Islamabad,For Sale,6,5,2178.0,16500000
3,House,Bani Gala,Islamabad,For Sale,4,4,10890.0,43500000
4,House,DHA Defence,Islamabad,For Sale,3,3,2178.0,7000000


In [None]:
X = df.drop('price', axis = 1)
y = df['price']

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.compose import make_column_transformer

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [None]:
print(X_train.shape)
print(X_test.shape)

(41299, 7)
(20342, 7)


In [None]:
column_trans = make_column_transformer((OneHotEncoder(sparse_output=False, handle_unknown='ignore'), ['type', 'location', 'city', 'purpose']),
                                        remainder='passthrough')

In [None]:
scaler = StandardScaler()

In [None]:
rfr = RandomForestRegressor()

In [None]:
pipe = make_pipeline(column_trans, scaler, rfr)

In [None]:
pipe.fit(X_train, y_train)

In [None]:
y_pred_rfr = pipe.predict(X_test)

In [None]:
r2_score(y_test, y_pred_rfr)

0.8760064358180989

In [None]:
!pip install anvil-uplink

Collecting anvil-uplink
  Downloading anvil_uplink-0.5.1-py2.py3-none-any.whl.metadata (1.5 kB)
Collecting argparse (from anvil-uplink)
  Downloading argparse-1.4.0-py2.py3-none-any.whl.metadata (2.8 kB)
Collecting ws4py-sslupdate (from anvil-uplink)
  Downloading ws4py_sslupdate-0.5.1b0-py2.py3-none-any.whl.metadata (1.7 kB)
Downloading anvil_uplink-0.5.1-py2.py3-none-any.whl (95 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m95.9/95.9 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading argparse-1.4.0-py2.py3-none-any.whl (23 kB)
Downloading ws4py_sslupdate-0.5.1b0-py2.py3-none-any.whl (45 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.8/45.8 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: ws4py-sslupdate, argparse, anvil-uplink
Successfully installed anvil-uplink-0.5.1 argparse-1.4.0 ws4py-sslupdate-0.5.1b0


In [None]:
import anvil.server

anvil.server.connect("server_X2JGHFNZL7QQSJUPWVEXU67K-HAQNV46M4GEP63Y3")

Connecting to wss://anvil.works/uplink
Anvil websocket open
Connected to "Default Environment" as SERVER


In [None]:
@anvil.server.callable
def predict_price(type, location, city, purpose, baths, beds, area):
    # Check if function is triggered
    print(f"Inputs received: {type}, {location}, {city}, {purpose}, {baths}, {beds}, {area}")

    # Create a DataFrame with the input data
    input_data = pd.DataFrame({
        'type': [type],
        'location': [location],
        'city': [city],
        'purpose': [purpose],
        'baths': [baths],
        'beds': [beds],
        'area': [area]
    })

    # Make prediction
    predicted_price = pipe.predict(input_data)

    # Log the predicted price for debugging
    print(f"Predicted Price: {predicted_price[0]}")

    # Return the predicted price
    return predicted_price[0]

In [None]:
anvil.server.wait_forever()

Inputs received: Upper portion, G-10, Islamabad, For Rent, 1, 2, 843.954
Predicted Price: 24206.38095238095
Inputs received: Upper portion, G-10, Islamabad, For Rent, 1, 2, 843.954
Predicted Price: 24206.38095238095
Inputs received: Upper portion, G-10, Islamabad, For Rent, 1, 2, 843.945
Predicted Price: 24206.38095238095
