In [144]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import confusion_matrix, classification_report, mean_absolute_error, mean_absolute_percentage_error, f1_score
from scipy.stats import randint
import time

In [94]:
#Open Prediction Results
pred_low  = pd.read_csv("08_results_models/Prediction_Low.csv")
pred_mid  = pd.read_csv("08_results_models/Prediction_Mid.csv")
pred_high = pd.read_csv("08_results_models/Prediction_High.csv")

In [96]:
#Combine 2_stage results
combine_2stage = pd.concat([pred_low, pred_mid, pred_high], axis=0).sort_values(by='row_id')
len(combine_2stage)

52368

In [97]:
#No Price columns
cols_to_drop = [
    'log_odometer', 'log_car_age', 'log_miles_age',
    'log_state_freq', 'log_model_freq', 'manufacturer_marketcap',
    'condition_grouped', 'cylinders_cat', 'fuel',
    'title_status', 'transmission', 'drive',
    'type_grouped'
]

In [98]:
#Combine All predictions into 2_stage predictions
def final_pred(row):
    if row['segment_pred'] == 1:
        return row['low_pred_log'], row['low_pred']
    elif row['segment_pred'] == 2:
        return row['mid_pred_log'], row['mid_pred']
    elif row['segment_pred'] == 3:
        return row['high_pred_log'], row['high_pred']
    
combine_2stage[['2stage_pred_log', '2stage_pred']] = combine_2stage.apply(lambda row: pd.Series(final_pred(row)),axis=1)

In [99]:
#Drop seperate predictions
combine_2stage.drop(columns=[
    'low_pred_log', 'low_pred',
    'mid_pred_log', 'mid_pred',
    'high_pred_log', 'high_pred'
], inplace=True)

In [100]:
#Drop Non important features
combine_2stage.drop(columns=cols_to_drop, inplace=True)

Combine with 1 stage

In [102]:
#Open Predictions 1-stage + Delete non important
pred_1stage  = pd.read_csv("08_results_models/Prediction_1stage.csv")
pred_1stage.drop(columns=cols_to_drop, inplace=True)

In [104]:
#Merge Results
merged_results = combine_2stage.merge(pred_1stage, on='row_id', how='left')
merged_results = merged_results.sort_values(by='row_id').reset_index(drop=True)

Combine with Dummy

In [105]:
#Open Predictions Dummy + Delete non important
pred_dummy = pd.read_csv("Prediction_Dummy.csv")
pred_dummy.drop(columns=cols_to_drop, inplace=True)

In [106]:
#Merge Results
merged_results = merged_results.merge(pred_dummy, on='row_id', how='left')
merged_results = merged_results.sort_values(by='row_id').reset_index(drop=True)

In [112]:
#Save Combined Results 
merged_results.to_csv("08_results_models/Results_All_Models.csv",index=False)

Combine Feature Importance

In [159]:
#Open Feature Importance Data
importance_class  = pd.read_csv("08_feature_importance/Feature_Importance_Classification.csv")
importance_1stage = pd.read_csv("08_feature_importance/Feature_Importance_Regression.csv")
importance_low    = pd.read_csv("08_feature_importance/Feature_Importance_Low.csv")
importance_mid    = pd.read_csv("08_feature_importance/Feature_Importance_Mid.csv")
importance_high   = pd.read_csv("08_feature_importance/Feature_Importance_High.csv")

In [162]:
#Save one DF with all combined Feature Importance sorted on mean_importance
feature_importance = importance_class.merge(importance_1stage, on='feature', how='outer')
feature_importance = feature_importance.merge(importance_low, on='feature', how='outer')
feature_importance = feature_importance.merge(importance_mid, on='feature', how='outer')
feature_importance = feature_importance.merge(importance_high, on='feature', how='outer')
feature_importance['mean_importance'] = feature_importance[['importance_1stage', 'importance_low', 'importance_mid', 'importance_high']].mean(axis=1)
feature_importance = feature_importance.sort_values(by='mean_importance', ascending=False)
feature_importance.to_csv('08_feature_importance/Feature_Importance_Combined.csv',index=False)