In [1]:
# Import Libraries

import numpy as np
import pandas as pd
import os
import pickle

In [47]:
class RentPredictor:
    def __init__(self):
        try:
            self.xgb_model = pickle.load(open("../models/XGBoost_Regressor_tunned.pickle", "rb"))
        except Exception as e:
            print("Error loading XGBoost Model\n", e)
        self.test_df = ""
        self.result_df = ""

    def load_test_csv(self, data_path):
        self.test_df = pd.read_csv(data_path, encoding = "UTF-8")
    
    def create_premium_house_feature(self, x):
        if x["garage"] == 1 and x["pool"] == 1 and x["fireplace"] == 1 and x["bed"] > 3 and x["bathroom_combined"] > 3:
            return 1
        return 0

    def data_preprocessing(self):
        self.test_df.drop(['address', 'state'], axis = 1, inplace = True)
        self.test_df = pd.get_dummies(self.test_df, columns = ['property_type'])
        self.test_df.drop(["property_type_Condo", "county", "city", "zipcode"], axis = 1, inplace = True)
        self.test_df["age"] = self.test_df["yearbuilt"].apply(lambda x: 2021 - x)
        self.test_df.drop(['yearbuilt'], axis = 1, inplace = True)
        self.test_df["sqft_per_bed"] = self.test_df["sqft"] / self.test_df["bed"]
        self.test_df["bathroom_combined"] = self.test_df["bath"] + (0.5 * self.test_df["halfbath"])
        self.test_df.drop(['bath', 'halfbath'], axis = 1, inplace = True)
        self.test_df["is_premium_house"] = self.test_df.apply(self.create_premium_house_feature, axis = 1)

        return

    def init_result_df(self):
        self.result_df = self.test_df[["id"]].copy()
        return

    def get_predictions(self):
        self.test_df.drop(['id'], axis = 1, inplace = True)
        rent_pred = self.xgb_model.predict(self.test_df)
        self.result_df["rent"] = rent_pred

        return self.result_df


In [49]:
rent_predictor = RentPredictor()
rent_predictor.load_test_csv("../data/test.csv")
rent_predictor.data_preprocessing()
rent_predictor.init_result_df()
res = rent_predictor.get_predictions()

In [50]:
res.head()

Unnamed: 0,id,rent
0,test_0,1070.832886
1,test_1,1218.61731
2,test_2,935.412964
3,test_3,891.063293
4,test_4,842.345093


In [51]:
res.to_csv("../data/result_submission.csv", index = False)