In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#### Load test cleaned data

In [2]:
data_test = pd.read_parquet('data/data_test.parquet.gzip')

In [3]:
target_1 = 'baseRentSquareMeter' 
target_2 = 'price_costs_square_meter'

In [4]:
all_but_targets = list(set(data_test.columns) - set([target_1,target_2]))

data_test[all_but_targets] = data_test[all_but_targets].fillna(99999)

In [5]:
import json

with open('data/back_mapping.json', 'r') as fp:
    back_mapping = json.load(fp)

In [None]:
data_test = data_test.replace(back_mapping)

### Load models

In [None]:
from modelling_utils import load_model,load_model_config

In [None]:
model_1 = load_model(target_1)
model_2 = load_model(target_2)

model_config_1 = load_model_config(target_1)
model_config_2 = load_model_config(target_2)

### Predict

In [None]:
features_1 = model_config_1['feature_cols']
features_2 = model_config_2['feature_cols']

In [None]:
#Data checker and converter (This should be done more extensively and with tests to ensure no production crash)
data_test['streetPlain'] = pd.to_numeric(data_test['streetPlain'], errors='coerce').fillna(99999)

In [None]:
model_1

In [None]:
data_test['prediction_model_1'] = model_1.predict(data_test[features_1])

In [None]:
data_test['prediction_model_2'] = model_2.predict(data_test[features_2])

In [None]:
data_test['prediction_total'] = data_test['prediction_model_1'] + data_test['prediction_model_2']

### Evaluate Error where we have data of the totalRent.

In [None]:
data_test['error_1'] = data_test[target_1] - data_test['prediction_model_1']
data_test['error_2'] = data_test[target_2] - data_test['prediction_model_2']
data_test['total_error'] = data_test['prediction_total'] - (data_test[target_1] + data_test[target_2])

In [None]:
data_test['error_1'].abs().mean()

In [None]:
data_test['error_2'].abs().mean()

In [None]:
data_test['total_error'].abs().mean()

Error a bit higher than in validation. Maybe I should try other validation techniques. For examples different CV squema.

In [None]:
from modelling_utils import visualize_errors_features, visualize_samples_error 

In [None]:
features_plot = list(set(features_1).union(set(features_2)))
samples_error = visualize_samples_error(data = data_test,
                  error_column = 'total_error',
                  absolute_error_threshold = 5)

visualize_errors_features(data = data_test,
                    error_column = 'total_error',
                    features_visualize = features_plot)

### Evaluate Error of predicted error

In [None]:
target_error = 'target_error'

data_test[target_error] = data_test['total_error'].abs()
 
model_error = load_model(target_error)

model_config_error = load_model_config(target_error)

In [None]:
features_error = model_config_error['feature_cols']

In [None]:
data_test['prediction_model_error'] = model_error.predict(data_test[features_error])

In [None]:
data_test['error_of_error'] = data_test[target_error] - data_test['prediction_model_error']

In [None]:
data_test['error_of_error'].abs().mean()

In [None]:
features_plot = features_error
samples_error = visualize_samples_error(data = data_test,
                  error_column = 'error_of_error',
                  absolute_error_threshold = 5)

visualize_errors_features(data = data_test,
                    error_column = 'error_of_error',
                    features_visualize = features_plot)