In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder
import category_encoders as ce
import joblib
from sklearn.preprocessing import StandardScaler

In [14]:
df_rent = pd.read_csv('df-rent.csv')
df_rent.head()

Unnamed: 0,price,area,num_bath_rooms,num_bed_rooms,zone,price_log
0,70000.0,1520.0,2.819781,0.0,Chattogram City,11.156251
1,110000.0,2625.0,2.819781,0.0,Mirpur,11.608236
2,110000.0,2800.0,2.819781,0.0,Mirpur,11.608236
3,110000.0,2625.0,2.819781,0.0,Mirpur,11.608236
4,60000.0,2000.0,2.819781,0.0,Dakshin Khan,11.0021


In [15]:
df_sale = pd.read_csv('df-sale.csv')
df_sale.drop(columns=['Unnamed: 0'], inplace=True)
df_sale.head()

Unnamed: 0,price,area,num_bath_rooms,num_bed_rooms,zone,price_log
0,6100000.0,1185.0,2.865087,3.0,Khilgaon,15.623799
1,28900000.0,2464.0,4.0,3.0,Dhanmondi,17.179352
2,7500000.0,1140.0,2.865087,3.0,Mirpur,15.830414
3,20000000.0,1920.0,3.0,3.0,Bashundhara R/A,16.811243
4,10800000.0,1445.0,3.0,3.0,Khilgaon,16.195057


### Data Splitting

In [16]:
df_sale_x = df_sale.drop(columns=['price','price_log'], axis=1)
df_sale_y = df_sale['price_log']

df_rent_x = df_rent.drop(columns=['price','price_log'])
df_rent_y = df_rent['price_log']

### Dividing and Selecting_features

In [17]:
num_col = ['area','num_bed_rooms', 'num_bath_rooms']
large_cat = ['zone']
num_col, large_cat

(['area', 'num_bed_rooms', 'num_bath_rooms'], ['zone'])

### Custom Transformers

In [18]:
from sklearn.base import BaseEstimator, TransformerMixin

class CatBoostEncoderTransformer(BaseEstimator, TransformerMixin):
    def __init__(self, columns=None):
        self.columns = columns
        self.encoders = {}
    
    def fit(self, X, y=None):
        for col in self.columns:
            encoder = ce.CatBoostEncoder()
            encoder.fit(X[col], y)
            self.encoders[col] = encoder
        
        return self
    
    def transform(self, X):
        transformed_X = X.copy()
        
        for col, encoder in self.encoders.items():
            transformed_X[col] = encoder.transform(X[col])
        
        return transformed_X

class OneHotEncoderTransformer(BaseEstimator, TransformerMixin):
    def __init__(self, columns=None, drop_original=True):
        self.columns = columns
        self.drop_original = drop_original
        self.encoder = OneHotEncoder(sparse=False, handle_unknown='ignore')
        self.new_columns = None
    
    def fit(self, X, y=None):
        if self.columns is None:
            global small_cat
            self.columns = small_cat.tolist()
        
        self.encoder.fit(X[self.columns])
        self.new_columns = self.encoder.get_feature_names_out(self.columns)
        
        return self
    
    def transform(self, X):
        transformed_X = pd.DataFrame(self.encoder.transform(X[self.columns]), columns=self.new_columns, index=X.index)
        
        if self.drop_original:
            transformed_X = X.drop(columns=self.columns).join(transformed_X)
        
        return transformed_X
    
class NumberColsStandardScaler(BaseEstimator, TransformerMixin):
    def __init__(self, number_cols=None):
        self.number_cols = number_cols
        self.scaler = StandardScaler()
    
    def fit(self, X, y=None):
        if self.number_cols is None:
            self.number_cols = X.select_dtypes(include='number').columns.tolist()
        
        self.scaler.fit(X[self.number_cols])
        
        return self
    
    def transform(self, X):
        transformed_X = X.copy()
        transformed_X[self.number_cols] = self.scaler.transform(X[self.number_cols])
        
        return transformed_X
        
class PassAmenityColumns(BaseEstimator, TransformerMixin):
    def __init__(self):
        self
    def fit(self, X=None, y=None):
        return self
    def transform(self, df):
        return df

### Columns Transformer

In [19]:
from sklearn.compose import ColumnTransformer

#for rent_df
large_cat_transformer_rent = CatBoostEncoderTransformer(columns=large_cat)
Transformed_large_cat_rent = large_cat_transformer_rent.fit_transform(df_rent_x[large_cat], df_rent_y)
#num_col.append('zone')
df_rent_x['zone'] = Transformed_large_cat_rent['zone']
#scaler_transformer = NumberColsStandardScaler(number_cols=num_col)

#for sale_df
large_cat_transformer_sale = CatBoostEncoderTransformer(columns=large_cat)
Transformed_large_cat_sale = large_cat_transformer_sale.fit_transform(df_sale_x[large_cat], df_sale_y)
num_col.append('zone')
df_sale_x['zone'] = Transformed_large_cat_sale['zone']
scaler_transformer = NumberColsStandardScaler(number_cols=num_col)

preprocessor = ColumnTransformer(transformers=[ 
    #('small_cat', small_cat_transformer, small_cat),
    ('scaling', scaler_transformer, num_col)
    #('pass_amenity_cols', PassAmenityColumns(), amenity_col)
])

In [20]:
prepared_sale_x = preprocessor.fit_transform(df_sale_x,df_sale_y)
prepared_rent_x = preprocessor.fit_transform(df_rent_x, df_rent_y)

In [21]:
def prepare_input_for_model(x,purpose):
    if purpose == 'Rent' or purpose == 'rent':
        x_large_preapered = large_cat_transformer_rent.transform(x)
    if purpose == 'Sale' or purpose == 'sale':
        x_large_preapered = large_cat_transformer_sale.transform(x)
    x['zone'] = x_large_preapered['zone']
    x = preprocessor.transform(x)
    return x

In [22]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
rf_reg_sale = RandomForestRegressor()
rf_reg_rent = RandomForestRegressor()

#for sale
rf_reg_sale.fit(prepared_sale_x,df_sale_y)
joblib.dump(rf_reg_sale,'RandomForrest_sale')
#for rent
rf_reg_rent.fit(prepared_rent_x,df_rent_y)
joblib.dump(rf_reg_sale,'RandomForrest_rent')


['RandomForrest_rent']

In [23]:
#for sale
predictions_sale = rf_reg_sale.predict(prepared_sale_x)
rf_reg_mse_sale = mean_squared_error(df_sale_y, predictions_sale)
rf_reg_rmse_sale = np.sqrt(rf_reg_mse_sale)
#for rent
predictions_rent = rf_reg_rent.predict(prepared_rent_x)
rf_reg_mse_rent = mean_squared_error(df_rent_y, predictions_rent)
rf_reg_rmse_rent = np.sqrt(rf_reg_mse_rent)

rf_reg_rmse_sale, rf_reg_rmse_rent


(0.1336771507747974, 0.482169697458572)

In [24]:
def make_prediction(input,purpose):
    input = pd.DataFrame(input, index=[0])
    if purpose == 'Rent' or purpose == 'rent':
        prepared_input = prepare_input_for_model(x=input, purpose=purpose)
        model = joblib.load('RandomForrest_rent')

    if purpose == 'Sale' or purpose == 'sale':
        prepared_input = prepare_input_for_model(x=input, purpose=purpose)
        model = joblib.load('RandomForrest_sale')

    prediction = model.predict(prepared_input)
    antilog_price = np.exp(prediction)
    
    return int(antilog_price)