In [1]:
import os

In [2]:
os.chdir("../")

In [3]:
%pwd

'c:\\Users\\Kshitij\\Downloads\\DPDZero\\DPDZero'

In [6]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class DataFeatureConfig:
    root_dir: Path
    data_dir: Path
    output_dir: Path

In [7]:
from mlProject.constants import *
from mlProject.utils.common import read_yaml,create_directories

In [12]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])


    
    def get_feature_config(self) -> DataFeatureConfig:
        config = self.config.feature_engineering

        create_directories([config.root_dir])

        data_feature_config = DataFeatureConfig(
            root_dir = config.root_dir,
            data_dir = config.data_dir,
            output_dir = config.output_dir
        )
        return data_feature_config

In [13]:
import pandas as pd
import os
import urllib.request as request
import zipfile
from mlProject import logger
from mlProject.utils.common import get_size

In [None]:
class FeatureEngineering:
    def __init__(self, config: DataFeatureConfig):
        self.config = config

    def transform(self):
        merged_df = pd.read_csv(self.config.data_dir)
        agent_performance = merged_df.groupby(['agent_id', 'users_first_name', 'users_last_name', 
                                        'users_office_location', 'org_id', 'call_date']).apply(self.calculate_metrics).reset_index()
        agent_performance = agent_performance[[
            'agent_id', 'users_first_name', 'users_last_name', 'users_office_location', 'org_id',
            'call_date', 'login_time', 'presence', 'total_calls', 'unique_loans_contacted',
            'connect_rate', 'avg_call_duration'
        ]]
        
        agent_performance['connect_rate'] = agent_performance['connect_rate'].apply(lambda x: f"{x:.2%}")
        agent_performance.to_csv(self.config.output_dir, index=False)
            
    
    def calculate_metrics(self,group):
        total_calls = group['call_id'].nunique()
        unique_loans = group['installment_id'].nunique()
        
        completed_calls = group[group['status'] == 'completed']['call_id'].nunique()
        connect_rate = completed_calls / total_calls if total_calls > 0 else 0
        
        avg_duration = group['duration'].mean() if total_calls > 0 else 0
        presence = 1 if pd.notna(group['login_time'].iloc[0]) else 0
        
        return pd.Series({
            'total_calls': total_calls,
            'unique_loans_contacted': unique_loans,
            'connect_rate': connect_rate,
            'avg_call_duration': avg_duration,
            'presence': presence,
            'login_time': group['login_time'].iloc[0] if pd.notna(group['login_time'].iloc[0]) else 'Not Logged In'
        })

In [20]:
try:
    config = ConfigurationManager()
    data_feature_config = config.get_feature_config()
    data_feature = FeatureEngineering(config=data_feature_config)
    data_feature.transform()
except Exception as e:
    raise e

[2025-05-03 20:06:23,047: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-05-03 20:06:23,051: INFO: common: yaml file: params.yaml loaded successfully]
[2025-05-03 20:06:23,056: INFO: common: yaml file: schema.yaml loaded successfully]
[2025-05-03 20:06:23,058: INFO: common: created directory at: artifacts]
[2025-05-03 20:06:23,061: INFO: common: created directory at: artifacts/feature_engineering]


  'users_office_location', 'org_id', 'call_date']).apply(self.calculate_metrics).reset_index()
