In [None]:
from src.exception import ProjectException
from src.logger import logging
from src.predictor import ModelResolver
import pandas as pd
from src.utils import load_object
import os,sys
from datetime import datetime
from src.config import important_features ,TARGET_COLUMN
from src.utils import parse_date_DOB , parse_date_for_tenure , calculate_age , calculate_tenure
PREDICTION_DIR="prediction"
import numpy as np
 
file_path = os.path.join(os.getcwd() , "dataset/HRDataset_v14.csv")

def start_batch_prediction(input_file_path):
    try:
        os.makedirs(PREDICTION_DIR,exist_ok=True)
        logging.info(f"Creating model resolver object")
        model_resolver = ModelResolver(model_registry="saved_models")
        logging.info(f"Reading file :{input_file_path}")
        df = pd.read_csv(input_file_path)
        df.replace({"na":np.NAN},inplace=True)
        #validation
        not_important_features = [ column for column in df.columns if column not in important_features]
        df.drop(not_important_features, axis=1 , inplace=True) #Lets drop unnecessary column
        df["DOB"] = df["DOB"].apply(parse_date_DOB)
        df['Age'] = df['DOB'].apply(calculate_age)
        mean_age = df['Age'][df['Age'] <= 50].mean()
        df['Age'] = df['Age'].apply(lambda x: mean_age if x > 50 else x)

        df['DateofHire'] = df['DateofHire'].apply(parse_date_for_tenure)
        df['DateofTermination'] = df['DateofTermination'].apply(parse_date_for_tenure)
        df['Tenure'] = df.apply(calculate_tenure, axis=1)
        df.drop(["DOB" , "DateofHire" ,"DateofTermination"] ,axis=1 ,inplace=True)
        print(df.shape)
        print(df.columns)
        #Lets load transformer pkl file
        transformer = load_object(file_path=model_resolver.get_latest_transformer_path())  
        input_feature_names =  list(transformer.feature_names_in_)  #all features tranformed while training model same as before
        feature_encoded_to_encoded = transformer.transform(df[input_feature_names]) #feature name  to be  tranformed 
        feature_encoded_df=pd.DataFrame(feature_encoded_to_encoded, columns=transformer.get_feature_names_out(input_feature_names)) #getting transformed features name
        #merging both tranformed  features and main df features
        df_encoded=pd.concat([df.drop(columns=input_feature_names).reset_index(drop=True), feature_encoded_df.reset_index(drop=True)], axis=1) 
        input_df=df_encoded.drop(TARGET_COLUMN , axis=1) #new independent features after transformation 
        target_df=df_encoded[TARGET_COLUMN] #In my case my Target is already Encoded 
        model = load_object(file_path=model_resolver.get_latest_model_path()) #loading best model
        prediction = model.predict(input_df)
        encoder_original_columns=transformer.get_feature_names_out() 
        original_categorical_before_encoded=transformer.inverse_transform(feature_encoded_to_encoded) #getting original features before transformation
       
        for i ,feature in enumerate(transformer.feature_names_in_):
            df[feature] =original_categorical_before_encoded[:,i]  
     
       
        
        
    except Exception as e:
        raise ProjectException(e,sys)    

In [73]:
start_batch_prediction(file_path)

(311, 18)
Index(['Employee_Name', 'MarriedID', 'GenderID', 'Salary', 'Termd', 'Position',
       'State', 'Zip', 'HispanicLatino', 'Department', 'ManagerName',
       'PerformanceScore', 'EngagementSurvey', 'EmpSatisfaction',
       'SpecialProjectsCount', 'Absences', 'Age', 'Tenure'],
      dtype='object')
           Employee_Name  MarriedID  GenderID  Salary  Termd  \
0    Adinolfi, Wilson  K          0         1   62506      0   
1                   None          1         1  104437      1   
2      Akinkuolie, Sarah          1         0   64955      1   
3           Alagbe,Trina          1         0   64991      0   
4       Anderson, Carol           0         0   50825      1   
..                   ...        ...       ...     ...    ...   
306       Woodson, Jason          0         1   65893      0   
307   Ybarra, Catherine           0         0   48513      1   
308     Zamora, Jennifer          0         0  220450      0   
309                 None          0         0   892

In [24]:
prediction
