In [None]:
# src/feature_engineering.py

import pandas as pd
from sklearn.preprocessing import LabelEncoder

def load_data(filepath):
    """
    Load the dataset into a pandas DataFrame.
    """
    df = pd.read_csv(filepath)
    return df

def handle_missing_values(df):
    """
    Handle missing values appropriately.
    """
    df['age'] = df['age'].fillna(df['age'].mean())
    df['balance'] = df['balance'].fillna(df['balance'].mean())
    df['credit_score'] = df['credit_score'].fillna(df['credit_score'].mean())
    df['estimated_salary'] = df['estimated_salary'].fillna(df['estimated_salary'].mean())
    # print(min(df['age'], max(df['age'])))
    print(df.isnull().sum())
    return df

def create_age_groups(df):
    bins = [17, 30, 45, 60, 100]  # Age ranges
    labels = ['young', 'adult', 'middle-aged', 'senior']  # Labels for age groups
    #todo: create age groups
    df['age_groups'] = pd.cut(df['age'], bins=bins, labels=labels, right=False)
    df = df.dropna()
    return df

def encode_categorical_features(df):
    """
    Encode categorical features using Label Encoding.
    """
    le = LabelEncoder()
    categorical_cols = ['country', 'gender', 'credit_card']
    # todo: run a loop to fit each column into le
    for column in categorical_cols:
        df[column] = le.fit_transform(df[column])
    return df

def save_processed_data(df, filepath):
    """
    Save the processed DataFrame to a CSV file.
    """
    #todo save the processed data into data folder using to_csv
    df.to_csv(filepath, index=False)

def main():
    # Load data
    df = load_data("data/bank_churn.csv")

    
    # Handle missing values
    df = handle_missing_values(df)
    
    # Create age groups
    df = create_age_groups(df)
    
    # Encode categorical features
    df = encode_categorical_features(df)
    
    # Save processed data
    save_processed_data(df, 'data/processed_bank_churn.csv') 

if __name__ == "__main__":
    main()


customer_id         0
credit_score        0
country             0
gender              0
age                 0
tenure              0
balance             0
products_number     0
credit_card         0
active_member       0
estimated_salary    0
churn               0
dtype: int64
