In [None]:
# src/feature_engineering.py

import pandas as pd
from sklearn.preprocessing import LabelEncoder

def load_data(filepath):
    """
    Load the dataset into a pandas DataFrame.
    """
    df = pd.read_csv(filepath)
    return df

def handle_missing_values(df):
    """
    Handle missing values appropriately.
    """
    # print(df.isnull().sum())
    numerical_cols = df.select_dtypes(include=['number']).columns
    df[numerical_cols] = df[numerical_cols].apply(lambda col: col.fillna(col.mean()))

    categorical_cols = df.select_dtypes(include=['object']).columns
    df[categorical_cols] = df[categorical_cols].apply(lambda col: col.fillna(col.mode()[0]))

    # print(df.isnull().sum())

    return df

def create_age_groups(df):
    bins = [17, 30, 45, 60, 100]  # Age ranges
    labels = ['young', 'adult', 'middle-aged', 'senior']  # Labels for age groups
    #todo: create age groups
    df = df.dropna(subset='age')
    df['age_range'] = pd.cut(df['age'], bins, labels=labels)

    return df

def encode_categorical_features(df):
    """
    Encode categorical features using Label Encoding.
    """
    le = LabelEncoder()
    categorical_cols = ['country', 'gender', 'credit_card']
   
    # todo: run a loop to fit each column into le

    for col in categorical_cols:
        df[col] = le.fit_transform(df[col])

    # df[categorical_cols] = df[categorical_cols].apply(le.fit_transform)
    # print(df.head())
    return df

def save_processed_data(df, filepath):
    """
    Save the processed DataFrame to a CSV file.
    """
    #todo save the processed data into data folder using to_csv
    df.to_csv(filepath, index=False)

def main():
    # Load data
    df = load_data("/workspaces/ts-aiml-datascience-challenge-kartikeya01/data/bank_churn.csv")

    
    # Handle missing values
    df = handle_missing_values(df)
    
    # Create age groups
    df = create_age_groups(df)
    
    # Encode categorical features
    df = encode_categorical_features(df)
    
    # Save processed data
    save_processed_data(df, '/workspaces/ts-aiml-datascience-challenge-kartikeya01/data/processed_bank_churn.csv') 

if __name__ == "__main__":
    main()


NameError: name 'src' is not defined