In [2]:
# if code is running on IBM Cloud Pak, uncomment
# %%writefile FieldAssignment.py

import pandas as pd
import numpy as np
from loguru import logger
from timeit import default_timer as timer

class FieldAssignment:
    def handle(self, df):
#         return {
#                 "cols_num" : list(df.select_dtypes(include=np.number).columns),
#                 "cols_flag" : list(df.select_dtypes(include=["bool"]).columns),
#                 "cols_categ" : list(set(df.columns) ^ set(df.select_dtypes(include=np.number).columns))
#                }
        logger.info('Started fields assignments...')
        start = timer()
        
        cols_num = list(df.select_dtypes(include=np.number).columns)
        cols_categ = list(set(df.columns) ^ set(df.select_dtypes(include=np.number).columns))
        cols_flag = list(df.select_dtypes(include=["bool"]).columns)
        
        df,cols_categ,cols_flag = FieldAssignment._create_flags_and_categ(self, df, cols_categ, cols_flag)
        
        logger.info('Numerical columns: {} ', cols_num)
        logger.info('Flag columns: {} ', cols_flag)
        logger.info('Categorical columns: {} ', cols_categ)
        
        end = timer()
        
        return df, {
                    "cols_num" : cols_num,
                    "cols_flag" : cols_flag,
                    "cols_categ" : cols_categ 
                   }

    def _create_flags_and_categ(self, df, cols_categ, cols_flag):
        logger.info('Turning certain flag fields into ones and zeros...') 
        # function for turning True to 1 and False to 0 and 
        # returns updated df 
        # Also handles: (not case-sensitive) (possible english and turkish flags)
        # Yes / No & Yes / None & Evet/ Hayır & Var / Yok
        
        new_cols_categ = []
        new_cols_flag = []
        isChanged = False
        
        keywords_true = ["yes", "yes", "evet", "var"]
        keywords_false = ["no", "none", "hayır", "yok"]
        for col in cols_categ:
            #col_unique = set([x.lower() for x in df[col]])
            col_unique = set([x.lower() for x in df[col] if str(x) != 'nan'])
            if len(col_unique) == 2:
                for i in range(len(keywords_true)):
                    if col_unique == {keywords_true[i], keywords_false[i]}:
                        logger.info(f"{col} changed...") 
                        df[col] = pd.Series(np.where(df[col].values == keywords_true[i], 1, 0), df.index)
                        new_cols_flag.append(col)
        
        # removing flags from categoric field
        new_cols_categ = list(set(cols_categ) ^ set(new_cols_flag))
        
        # for boolean values in the df
        if cols_flag == []:
            for col in cols_flag:
                df[col] = df[col].astype(int)
                new_cols_flag.append(col)
        
        return df, new_cols_categ, new_cols_flag
        