In [2]:
# src/feature_engineering.py

import pandas as pd
from sklearn.preprocessing import LabelEncoder

def load_data(filepath):
    df = pd.read_csv(filepath)
    return df

def handle_missing_values(df):
    numerical_cols = df.select_dtypes(include=['number']).columns
    df[numerical_cols] = df[numerical_cols].apply(lambda col: col.fillna(col.mean()))

    categorical_cols = df.select_dtypes(include=['object']).columns
    df[categorical_cols] = df[categorical_cols].apply(lambda col: col.fillna(col.mode()[0]))

    return df

def create_age_groups(df):
    bins = [17, 30, 45, 60, 100]  # Age ranges
    labels = ['young', 'adult', 'middle-aged', 'senior']  # Labels for age groups
    #todo: create age groups
    df = df.dropna(subset='age')
    df['age_range'] = pd.cut(df['age'], bins, labels=labels)

    return df

def encode_categorical_features(df):
    le = LabelEncoder()
    categorical_cols = ['country', 'gender', 'credit_card', 'age_range']

    for col in categorical_cols:
        df[col] = le.fit_transform(df[col])
    return df

def save_processed_data(df, filepath):
    df.to_csv(filepath, index=False)

def main():
    df = load_data("/workspaces/ts-aiml-datascience-challenge-vinayaks372/data/bank_churn.csv")

    df = handle_missing_values(df)
    
    df = create_age_groups(df)
    
    df = encode_categorical_features(df)
    
    save_processed_data(df, '/workspaces/ts-aiml-datascience-challenge-vinayaks372/data/processed_bank_churn.csv') 

if __name__ == "__main__":
    main()