In [4]:
import pandas as pd
rent_price = pd.read_csv('rent_price.csv')
print(rent_price.shape)
rent_price.head()

(740, 7)


Unnamed: 0,RegionName,State,CountyName,SizeRank,Bedrooms,Price,index
0,288.0,34.0,173.0,1,1,2356.598901,0
1,236.0,4.0,115.0,2,1,1602.959302,1
2,76.0,14.0,47.0,3,1,1459.912791,2
3,185.0,43.0,85.0,4,1,1182.826531,3
4,330.0,38.0,158.0,5,1,1180.203488,4


# Using Linear regression

In [5]:
#describing price feature
rent_price['Price'].describe()

count     740.000000
mean     1284.677264
std       583.675017
min       414.433333
25%       846.304798
50%      1163.244917
75%      1544.138393
max      4024.022989
Name: Price, dtype: float64

In [6]:
# after looking at the price feature
mean_baseline = 1284.677264

In [7]:
# calculating error now
errors = mean_baseline - rent_price['Price']
errors

0     -1071.921637
1      -318.282038
2      -175.235527
3       101.850733
4       104.473776
          ...     
735    -260.788253
736    -759.202254
737   -1369.949242
738    -604.270104
739     287.781160
Name: Price, Length: 740, dtype: float64

In [8]:
#calculating mean absolute error
mean_absolute_error = errors.abs().mean()
mean_absolute_error

442.1808671404665

In [10]:
#Split train into train and val
from sklearn.model_selection import train_test_split
train,test = train_test_split(rent_price, train_size = 0.80, test_size = 0.20,stratify=rent_price['Bedrooms'],random_state=45)

In [11]:
train.shape

(592, 7)

In [12]:
test.shape

(148, 7)

In [13]:
# The Price column is the target
target = 'Price'

X_train = train.drop(columns=target)
y_train = train[target]
X_test = test.drop(columns=target)
y_test = test[target]


In [14]:
# Train Error
from sklearn.metrics import mean_absolute_error
guess = mean_baseline
y_pred = [guess] * len(y_train)
mae = mean_absolute_error(y_train, y_pred)
print(f'Train Error : {mae:.2f} percentage points')

Train Error : 437.18 percentage points


In [15]:
# Test Error
y_pred = [guess] * len(y_test)
mae = mean_absolute_error(y_test, y_pred)
print(f'Test Error : {mae:.2f} percentage points')

Test Error : 462.19 percentage points


In [16]:
(y_test - y_pred).abs().mean()

462.1871855521598

In [None]:
#Use scikit-learn to fit a multiple regression with three features.

In [17]:
# 1. Import the appropriate estimator class from Scikit-Learn
from sklearn.linear_model import LinearRegression

In [18]:
# 2. Instantiate this class
model = LinearRegression()
model

LinearRegression()

In [19]:

# Re-arrange X features matrices
features = ['State','CountyName', 
            'Bedrooms']
print(f'Linear Regression, dependent on: {features}')

train = X_train[features]
train

Linear Regression, dependent on: ['State', 'CountyName', 'Bedrooms']


Unnamed: 0,State,CountyName,Bedrooms
734,4.0,115.0,2
482,4.0,187.0,2
178,9.0,29.0,1
322,23.0,87.0,2
285,43.0,52.0,2
...,...,...,...
55,10.0,43.0,1
349,43.0,118.0,2
171,38.0,39.0,1
39,9.0,128.0,1


In [20]:
test = X_test[features]
test

Unnamed: 0,State,CountyName,Bedrooms
677,10.0,55.0,2
186,4.0,115.0,1
77,9.0,108.0,1
86,45.0,176.0,1
274,9.0,128.0,1
...,...,...,...
356,15.0,8.0,2
676,9.0,29.0,2
622,38.0,104.0,2
321,11.0,91.0,2


In [21]:
# Fit the model
model.fit(train, y_train)

LinearRegression()

In [22]:
#  Apply the model to new data
y_pred_train = model.predict(train)
mean_absolute_error(y_pred_train, y_train)

420.37895849708644

In [23]:
y_pred = model.predict(test)
mean_absolute_error(y_pred, y_test)

435.49005198001583

In [24]:
model.intercept_, model.coef_

(1212.8578728754192, array([-10.07313898,   0.51786101, 137.07322514]))

In [25]:

# This is easier to read
print('Intercept', model.intercept_)
coefficients = pd.Series(model.coef_, features)
print(coefficients.to_string())

Intercept 1212.8578728754192
State         -10.073139
CountyName      0.517861
Bedrooms      137.073225


In [None]:
# # save the model to disk
# import pickle
# filename = 'rent_recommendation_model.sav'
# pickle.dump(model, open(filename, 'wb'))
            

# testing the model with a testcase

In [26]:
from sklearn.preprocessing import OrdinalEncoder
import numpy as np
def predict(State, CountyName, Bedrooms):
    ordinal_encoder = OrdinalEncoder()
    z = ordinal_encoder.fit_transform([['State','CountyName']])
    z = z.flatten()
    z = z.tolist()
    z.append(Bedrooms)
    result = model.predict(np.array([z]))
    return result


In [27]:
predict('IL','Dupage',2)

array([1487.00432316])

## testing with pickled model and dataset

In [28]:

import pandas as pd
import pickle
from pickle import load
filename = 'rent_recommendation_model.sav'
loaded_model = pickle.load(open(filename, 'rb'))
#rentdf_pkl = pd.read_pickle("rent_price_dataset.pkl")

In [29]:
from sklearn.preprocessing import OrdinalEncoder
import numpy as np
import pickle
from pickle import load
def predict_rent(State, CountyName, Bedrooms):
    ordinal_encoder = OrdinalEncoder()
    z = ordinal_encoder.fit_transform([['State','CountyName']])
    z = z.flatten()
    z = z.tolist()
    z.append(Bedrooms)
    result = loaded_model.predict(np.array([z]))
    return result

In [30]:
predict_rent('IL','Dupage',2)

array([1487.00432316])

## Code with Pydantic

In [None]:
import pandas as pd
from pydantic import BaseModel
from sklearn.preprocessing import OrdinalEncoder
import pandas as pd
import pickle
from pickle import load
filename = 'rent_recommendation_model.sav'
loaded_model = pickle.load(open(filename, 'rb'))

In [None]:
class Rent_Predict(BaseModel):
    State:str
    CountyName:str
    Bedrooms:int
    
    def dict_df(self):
        """Convert to pandas dataframe with 1 row"""
        y=pd.DataFrame([dict(self)])
        ordinal_encoder = OrdinalEncoder()
        y[['State','CountyName']] = ordinal_encoder.fit_transform(y[['State','CountyName']])
        return y     
        

In [None]:
#@router.post("/api/rental_priceprediction")
def rental_price_prediction(rent_predict:Rent_Predict):
   
    
    """ Machine Learning model predicts the rental price of the target city and county
    

    args:
        State: Provide the abbreviation of the target state. For eg NY for New York
        CountyName: Provide the county name.
        Bedrooms: Number of bedrroms required.

    returns:
        Dictionary that contains the requested data, which is converted
        by fastAPI to a json object.
    """
    
        
    result = loaded_model.predict(rent_predict.dict_df()) 
    return {'predicted_price': result[0]}
    
 