In [None]:
# src/feature_engineering.py

import pandas as pd
from sklearn.preprocessing import LabelEncoder

def load_data(filepath):
    """
    Load the dataset into a pandas DataFrame.
    """
    df = pd.read_csv(filepath)
    return df

def handle_missing_values(df):
    """
    Handle missing values appropriately.
    """
    df['credit_score'].fillna(df['credit_score'].median(),inplace=True)
    df['age'].fillna(df['age'].median(),inplace=True)
    df['tenure'].fillna(df['tenure'].median(),inplace=True)
    df['estimated_salary'].fillna(df['estimated_salary'].median(),inplace=True)
    df['balance'].fillna(df['balance'].median(),inplace=True)
    df['products_number'].fillna(df['products_number'].median(),inplace=True)
    df['gender'].fillna(df['gender'].median(),inplace=True)
    df['credit_card'].fillna(df['credit_card'].median(),inplace=True)
    df['active_number'].fillna(df['active_number'].median(),inplace=True)

    return df

def create_age_groups(df):
    bins = [17, 30, 45, 60, 100]  # Age ranges
    labels = ['young', 'adult', 'middle-aged', 'senior']  # Labels for age groups
    #todo: create age groups
    df['age_group'] = pd.cut(df['age'],bins=bins,labels=labels,right = False)
    df = df.dropna()
    return df

def encode_categorical_features(df):
    """
    Encode categorical features using Label Encoding.
    """
    le = LabelEncoder()
    categorical_cols = ['country', 'gender', 'credit_card','age_group']
   
    # todo: run a loop to fit each column into le
    for col in categorical_cols:
        if col in df.columns:
            df[col]=le.fit_transform(df[col])
    return df

def save_processed_data(df, filepath):
    """
    Save the processed DataFrame to a CSV file.
    """
    df.to_csv(filepath,index=False)
    #todo save the processed data into data folder using to_csv

def main():
    # Load data
    df = load_data("/workspaces/ts-aiml-datascience-challenge-Indhu2626/data/bank_churn.csv")

    
    # Handle missing values
    df = handle_missing_values(df)
    
    # Create age groups
    df = create_age_groups(df)
    
    # Encode categorical features
    df = encode_categorical_features(df)
    
    # Save processed data
    save_processed_data(df,'/workspaces/ts-aiml-datascience-challenge-Indhu2626/data/processed_bank_churn.csv')#copyfolderpath/processed_bank_churn.csv') 

if __name__ == "__main__":
    main()


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['credit_score'].fillna(df['credit_score'].median(),inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['age'].fillna(df['age'].median(),inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on whi

TypeError: Cannot convert ['Female' 'Female' 'Female' ... 'Female' 'Male' 'Female'] to numeric