In [1]:
import os
import platform
import pandas as pd
import numpy as np
import matplotlib
import seaborn as sns
import matplotlib.pyplot as plt

# Check which platform is running the notebook
if platform.system() == 'Windows':
    PROJECT_PATH = "\\".join(os.getcwd().split('\\')[:-1])
else:
    # Assuming a Unix based platform
    PROJECT_PATH = "/".join(os.getcwd().split('/')[:-1])

DATA_PATH = os.path.join(PROJECT_PATH, 'data')
TEST_DATA_PATH = os.path.join(DATA_PATH, 'test.csv')

X = pd.read_csv(TEST_DATA_PATH)
X.shape

(1459, 80)

## Loading the model

Version 1 seems to perform the best on the test training set. So will try that first on the submission dataset to get a basic baseline.

In [2]:
from sklearn.base import TransformerMixin

class IndicesExtractor(TransformerMixin):
    def __init__(self, col_indices):
        self.col_indices = col_indices
        
    def fit(self, X, y=None):
        return self
    
    def transform(self, X):
        return X[:, self.col_indices]

In [3]:
import joblib

model = os.path.join(PROJECT_PATH, 'models', 'house_prices_model_v1_23_08_2020.joblib')

house_prices_model = joblib.load(model)

In [4]:
house_id = X['Id']
X = X.drop('Id', axis=1)
y_pred = house_prices_model.predict(X)

In [6]:
output = {
    'Id': house_id,
    'SalePrice': y_pred
}

submission_df = pd.DataFrame.from_dict(output)
submission_df.head()

Unnamed: 0,Id,SalePrice
0,1461,121552.5
1,1462,165604.375
2,1463,183590.0525
3,1464,179193.7575
4,1465,197827.075


In [11]:
# Save Submission
submission_filename = os.path.join(DATA_PATH, 'submission_v1.csv')
submission_df.to_csv(submission_filename, index=False)