### Model scoring function tested with X_test

Original dataset, split into train and test, X_test has been sent to the scoring function for testing

In [1]:
import pandas as pd
Prices = pd.read_csv('Car_prices_project_2.csv')   

In [2]:
from sklearn.model_selection import train_test_split
X = Prices.drop(columns=['price'])
Y = Prices.price

In [3]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, shuffle=True,random_state=1301)

In [4]:
def project_2_scoring(Prices):
    from sklearn.preprocessing import LabelEncoder,OneHotEncoder
    import category_encoders as ce
    from copy import deepcopy
    from sklearn.preprocessing import MinMaxScaler
    import pickle
    from lightgbm import LGBMRegressor 
    
    X = Prices.copy()
    
    loaded_scalers = pickle.load(open('scalers.sav', 'rb'))
    loaded_encoders = pickle.load(open('categorical_encoders.sav', 'rb'))
    loaded_model = pickle.load(open('lgbm_model.sav', 'rb'))
    
    cols_to_drop = []
    cat_encoders = {}
    cat_enc_columns = []
    
    for col in Prices.columns:
      if Prices[col].dtype == 'object':
        if Prices[col].nunique() < 10:
            print("One-hot encoding of ", col)
            enc = OneHotEncoder(handle_unknown='ignore', sparse=False)
            enc = loaded_encoders[col][0]
            result = enc.transform(Prices[[col]])
            ohe_columns = [col+"_"+str(x) for x in enc.categories_[0]]
            cat_enc_columns = cat_enc_columns + ohe_columns 
            result_df = pd.DataFrame(result, columns=ohe_columns)
            Prices = pd.concat([Prices, result_df], axis=1)
            cat_encoders[col] = [deepcopy(enc),"ohe"]
        else:
            print("Target Encode scaling of ", col)
            enc = ce.TargetEncoder()
            enc = loaded_encoders[col][0]
            new_col_name = col+"_trg"
            Prices[new_col_name] = enc.transform(Prices[[col]])
            cat_encoders[col] = [deepcopy(enc),"trg"]
            cat_enc_columns.append(new_col_name)

        
        cols_to_drop.append(col)
            
    num_scalers = {}
    '''Scale only original numerical columns'''
    for col in Prices.drop(columns=cat_enc_columns):
      if pd.api.types.is_numeric_dtype(Prices[col].dtype):
        print("MinMax scale of ", col)
        scaler = MinMaxScaler()
        scaler.fit(Prices[[col]])
        Prices[col+"_sc"] = scaler.transform(Prices[[col]])
    
        num_scalers[col] = [deepcopy(scaler),"MinMax"]
        cols_to_drop.append(col)
        
    X = Prices.drop(columns=cols_to_drop)
    
    
    y_predicted = loaded_model.predict(X)

    return list(y_predicted)

In [5]:
project_2_scoring(X_test)

Target Encode scaling of  mark
Target Encode scaling of  model
Target Encode scaling of  generation_name
One-hot encoding of  fuel
Target Encode scaling of  city
Target Encode scaling of  province
MinMax scale of  year
MinMax scale of  mileage
MinMax scale of  vol_engine


[33811.592757768485,
 33811.592757768485,
 37640.22418021815,
 17018.882344322978,
 21889.216760211017,
 37640.22418021815,
 70056.6215787934,
 34538.57659842917,
 33811.592757768485,
 33811.592757768485,
 37640.22418021815,
 33811.592757768485,
 37640.22418021815,
 33811.592757768485,
 33811.592757768485,
 37640.22418021815,
 37640.22418021815,
 37640.22418021815,
 33811.592757768485,
 33811.592757768485,
 37640.22418021815,
 33811.592757768485,
 51243.1379183344,
 33811.592757768485,
 34538.57659842917,
 33811.592757768485,
 33811.592757768485,
 45921.98636073476,
 33811.592757768485,
 37640.22418021815,
 37640.22418021815,
 40329.40133967234,
 37640.22418021815,
 37640.22418021815,
 33811.592757768485,
 37640.22418021815,
 33811.592757768485,
 33811.592757768485,
 38599.966218975416,
 33811.592757768485,
 37640.22418021815,
 37640.22418021815,
 28353.62389437978,
 37640.22418021815,
 33811.592757768485,
 37640.22418021815,
 37640.22418021815,
 33811.592757768485,
 33811.592757768485