# FEATURE ENGINEERING
## FEATURE CREATION

In [43]:
from zipfile import ZipFile
from src.utils.helpers import convert_to_category, deplacer_colonnes, iqr
from sklearn.preprocessing import OneHotEncoder
import pandas as pd

In [2]:
df_client = pd.read_csv('../data/processed/df_client_train_processed.csv')

In [3]:
zip_path = "../data/processed/df_transactions_train_processed.zip"

with ZipFile(zip_path, 'r') as zip_ref:
    csv_filename = zip_ref.namelist()[0]
    with zip_ref.open(csv_filename) as csv_file:
        df_transactions = pd.read_csv(csv_file)

In [4]:
df_client_1 = df_client.copy()
df_transactions_1 = df_transactions.copy()

In [5]:
df_client_1 = convert_to_category(df_client_1, ['district', 'client_catg', 'region'])
df_transactions_1 = convert_to_category(df_transactions_1, ['tarif_type','counter_statue', 'counter_coefficient'])

In [6]:
df_client_1['creation_date'] = pd.to_datetime(df_client_1['creation_date'], format= 'mixed', dayfirst=True)
df_transactions_1['invoice_date'] = pd.to_datetime(df_transactions_1['invoice_date'])

In [7]:
df_transactions_1['lag_1'] = df_transactions_1.groupby('client_id')['consommation_total'].shift(1)

In [8]:
df_transactions_1['variation_conso'] = ((df_transactions_1['consommation_total'] - df_transactions_1['lag_1'])/df_transactions_1['lag_1'])*100

In [9]:
borne_inf, borne_sup = iqr(df_transactions_1, 'consommation_total')

df_transactions_1['outlier_conso_total'] = ((df_transactions_1['consommation_total'] < borne_inf) | (df_transactions_1['consommation_total'] > borne_sup)).astype(int)

borne_inf_var, borne_sup_var = iqr(df_transactions_1, 'variation_conso')

df_transactions_1['outlier_variation'] = ((df_transactions_1['variation_conso'] < borne_inf_var) | (df_transactions_1['variation_conso'] > borne_sup_var)).astype(int)

In [10]:
df_transactions_1['is_null_conso'] = (df_transactions_1['consommation_total'] == 0).astype(int)

In [11]:
df_transactions_1['part_conso_level_1'] = (df_transactions_1['consommation_level_1'] / df_transactions_1['consommation_total'])*100
df_transactions_1['part_conso_level_2'] = (df_transactions_1['consommation_level_2'] / df_transactions_1['consommation_total'])*100
df_transactions_1['part_conso_level_3'] = (df_transactions_1['consommation_level_3'] / df_transactions_1['consommation_total'])*100
df_transactions_1['part_conso_level_4'] = (df_transactions_1['consommation_level_4'] / df_transactions_1['consommation_total'])*100

In [12]:
df_transactions_1['dominant_level'] = df_transactions_1[['part_conso_level_1', 'part_conso_level_2', 'part_conso_level_3', 'part_conso_level_4']].idxmax(axis=1)

  df_transactions_1['dominant_level'] = df_transactions_1[['part_conso_level_1', 'part_conso_level_2', 'part_conso_level_3', 'part_conso_level_4']].idxmax(axis=1)


In [13]:
df_transactions_1['lag_level_1'] = df_transactions_1.groupby('client_id')['consommation_level_1'].shift(1)
df_transactions_1['lag_level_2'] = df_transactions_1.groupby('client_id')['consommation_level_2'].shift(1)
df_transactions_1['lag_level_3'] = df_transactions_1.groupby('client_id')['consommation_level_3'].shift(1)
df_transactions_1['lag_level_4'] = df_transactions_1.groupby('client_id')['consommation_level_4'].shift(1)

In [14]:
df_transactions_1['variation_level_1'] = ((df_transactions_1['consommation_level_1'] - df_transactions_1['lag_level_1'])/df_transactions_1['lag_level_1'])*100
df_transactions_1['variation_level_2'] = ((df_transactions_1['consommation_level_2'] - df_transactions_1['lag_level_2'])/df_transactions_1['lag_level_2'])*100
df_transactions_1['variation_level_3'] = ((df_transactions_1['consommation_level_3'] - df_transactions_1['lag_level_3'])/df_transactions_1['lag_level_3'])*100
df_transactions_1['variation_level_4'] = ((df_transactions_1['consommation_level_4'] - df_transactions_1['lag_level_4'])/df_transactions_1['lag_level_4'])*100

In [15]:
borne_inf_level_1, borne_sup_level_1 = iqr(df_transactions_1, 'consommation_level_1')
borne_inf_level_2, borne_sup_level_2 = iqr(df_transactions_1, 'consommation_level_2')
borne_inf_level_3, borne_sup_level_3 = iqr(df_transactions_1, 'consommation_level_3')
borne_inf_level_4, borne_sup_level_4 = iqr(df_transactions_1, 'consommation_level_4')

df_transactions_1['outlier_level_1'] = ((df_transactions_1['consommation_level_1'] < borne_inf_level_1) | (df_transactions_1['consommation_level_1'] > borne_sup_level_1)).astype(int)
df_transactions_1['outlier_level_2'] = ((df_transactions_1['consommation_level_2'] < borne_inf_level_2) | (df_transactions_1['consommation_level_2'] > borne_sup_level_2)).astype(int)
df_transactions_1['outlier_level_3'] = ((df_transactions_1['consommation_level_3'] < borne_inf_level_3) | (df_transactions_1['consommation_level_3'] > borne_sup_level_3)).astype(int)
df_transactions_1['outlier_level_4'] = ((df_transactions_1['consommation_level_4'] < borne_inf_level_4) | (df_transactions_1['consommation_level_4'] > borne_sup_level_4)).astype(int)

In [16]:
borne_inf_var_level_1, borne_sup_var_level_1 = iqr(df_transactions_1, 'variation_level_1')
borne_inf_var_level_2, borne_sup_var_level_2 = iqr(df_transactions_1, 'variation_level_2')
borne_inf_var_level_3, borne_sup_var_level_3 = iqr(df_transactions_1, 'variation_level_3')
borne_inf_var_level_4, borne_sup_var_level_4 = iqr(df_transactions_1, 'variation_level_4')

df_transactions_1['outlier_var_level_1'] = ((df_transactions_1['variation_level_1'] < borne_inf_var_level_1) | (df_transactions_1['variation_level_1'] > borne_sup_var_level_1)).astype(int)
df_transactions_1['outlier_var_level_2'] = ((df_transactions_1['variation_level_2'] < borne_inf_var_level_2) | (df_transactions_1['variation_level_2'] > borne_sup_var_level_2)).astype(int)
df_transactions_1['outlier_var_level_3'] = ((df_transactions_1['variation_level_3'] < borne_inf_var_level_3) | (df_transactions_1['variation_level_3'] > borne_sup_var_level_3)).astype(int)
df_transactions_1['outlier_var_level_4'] = ((df_transactions_1['variation_level_4'] < borne_inf_var_level_4) | (df_transactions_1['variation_level_4'] > borne_sup_var_level_4)).astype(int)

  diff_b_a = subtract(b, a)
  diff_b_a = subtract(b, a)


In [23]:
df_transactions_1['is_reading_remarque_6'] = (df_transactions_1['reading_remarque'] == 6).astype(int)
df_transactions_1['is_reading_remarque_7'] = (df_transactions_1['reading_remarque'] == 7).astype(int)
df_transactions_1['is_reading_remarque_8'] = (df_transactions_1['reading_remarque'] == 8).astype(int)
df_transactions_1['is_reading_remarque_9'] = (df_transactions_1['reading_remarque'] == 9).astype(int)

In [24]:
df_transactions_1['is_counter_coefficient_1'] = (df_transactions_1['counter_coefficient'] == 1).astype(int)
df_transactions_1['is_counter_coefficient_20'] = (df_transactions_1['counter_coefficient'] == 20).astype(int)

In [25]:
df_transactions_1 = convert_to_category(df_transactions_1, ['outlier_conso_total', 'outlier_variation', 'is_null_conso','dominant_level', 
                                                            'is_reading_remarque_6', 'is_reading_remarque_7', 'is_reading_remarque_8', 'is_reading_remarque_9', 'is_counter_coefficient_1', 'is_counter_coefficient_20'])

In [26]:
df_transactions_1['dominant_level'] = df_transactions_1['dominant_level'].cat.set_categories(['part_conso_level_1', 'part_conso_level_2', 'part_conso_level_3', 'part_conso_level_4'])
df_transactions_1['dominant_level'] = df_transactions_1['dominant_level'].cat.rename_categories({'part_conso_level_1':1, 'part_conso_level_2':2, 'part_conso_level_3':3, 'part_conso_level_4':4})

In [27]:
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
cat_columns = ['tarif_type', 'counter_statue', 'counter_coefficient', 'outlier_conso_total', 'outlier_variation', 'is_null_conso', 'dominant_level', 
               'is_reading_remarque_6', 'is_reading_remarque_7', 'is_reading_remarque_8', 'is_reading_remarque_9', 'is_counter_coefficient_1', 'is_counter_coefficient_20']
encoded = encoder.fit_transform(df_transactions_1[cat_columns])


In [28]:
encoded_df = pd.DataFrame(encoded, columns=encoder.get_feature_names_out(cat_columns))

df_transactions_final = pd.concat([df_transactions_1.drop(cat_columns, axis=1), encoded_df], axis=1)

In [29]:
df_transactions_final.columns

Index(['client_id', 'invoice_date', 'counter_number', 'counter_code',
       'reading_remarque', 'consommation_level_1', 'consommation_level_2',
       'consommation_level_3', 'consommation_level_4', 'old_index',
       'new_index', 'months_number', 'consommation_total', 'conso_level',
       'ecart', 'conso_incoherente', 'lag_1', 'variation_conso',
       'part_conso_level_1', 'part_conso_level_2', 'part_conso_level_3',
       'part_conso_level_4', 'lag_level_1', 'lag_level_2', 'lag_level_3',
       'lag_level_4', 'variation_level_1', 'variation_level_2',
       'variation_level_3', 'variation_level_4', 'outlier_level_1',
       'outlier_level_2', 'outlier_level_3', 'outlier_level_4',
       'outlier_var_level_1', 'outlier_var_level_2', 'outlier_var_level_3',
       'outlier_var_level_4', 'tarif_type_9', 'tarif_type_10', 'tarif_type_11',
       'tarif_type_12', 'tarif_type_13', 'tarif_type_14', 'tarif_type_15',
       'tarif_type_18', 'tarif_type_21', 'tarif_type_24', 'tarif_type_29',

In [34]:
df_agg = df_transactions_final.groupby('client_id').agg(
    last_invoice_date = pd.NamedAgg(column='invoice_date', aggfunc='max'),
    counter_number = pd.NamedAgg(column='counter_number', aggfunc='max'),
    counter_code = pd.NamedAgg(column='counter_code', aggfunc='max'),
    reading_remarque_6 = pd.NamedAgg(column='is_reading_remarque_6_1', aggfunc='sum'),
    reading_remarque_7 = pd.NamedAgg(column='is_reading_remarque_7_1', aggfunc='sum'),
    reading_remarque_8 = pd.NamedAgg(column='is_reading_remarque_8_1', aggfunc='sum'),
    reading_remarque_9 = pd.NamedAgg(column='is_reading_remarque_9_1', aggfunc='sum'),
    reading_remarque_min = pd.NamedAgg(column='reading_remarque', aggfunc='min'),
    reading_remarque_max = pd.NamedAgg(column='reading_remarque', aggfunc='max'),
    reading_remarque_std = pd.NamedAgg(column='reading_remarque', aggfunc='std'),
    reading_remarque_mean = pd.NamedAgg(column='reading_remarque', aggfunc='mean'),
    reading_remarque_median = pd.NamedAgg(column='reading_remarque', aggfunc='median'),
    part_conso_level_1_min = pd.NamedAgg(column='part_conso_level_1', aggfunc='min'),
    part_conso_level_1_max = pd.NamedAgg(column='part_conso_level_1', aggfunc='max'),
    part_conso_level_1_std = pd.NamedAgg(column='part_conso_level_1', aggfunc='std'),
    part_conso_level_1_mean = pd.NamedAgg(column='part_conso_level_1', aggfunc='mean'),
    part_conso_level_1_median = pd.NamedAgg(column='part_conso_level_1', aggfunc='median'),
    part_conso_level_2_min = pd.NamedAgg(column='part_conso_level_2', aggfunc='min'),
    part_conso_level_2_max = pd.NamedAgg(column='part_conso_level_2', aggfunc='max'),
    part_conso_level_2_std = pd.NamedAgg(column='part_conso_level_2', aggfunc='std'),
    part_conso_level_2_mean = pd.NamedAgg(column='part_conso_level_2', aggfunc='mean'),
    part_conso_level_2_median = pd.NamedAgg(column='part_conso_level_2', aggfunc='median'),
    part_conso_level_3_min = pd.NamedAgg(column='part_conso_level_3', aggfunc='min'),
    part_conso_level_3_max = pd.NamedAgg(column='part_conso_level_3', aggfunc='max'),
    part_conso_level_3_std = pd.NamedAgg(column='part_conso_level_3', aggfunc='std'),
    part_conso_level_3_mean = pd.NamedAgg(column='part_conso_level_3', aggfunc='mean'),
    part_conso_level_3_median = pd.NamedAgg(column='part_conso_level_3', aggfunc='median'),
    part_conso_level_4_min = pd.NamedAgg(column='part_conso_level_4', aggfunc='min'),
    part_conso_level_4_max = pd.NamedAgg(column='part_conso_level_4', aggfunc='max'),
    part_conso_level_4_std = pd.NamedAgg(column='part_conso_level_4', aggfunc='std'),
    part_conso_level_4_mean = pd.NamedAgg(column='part_conso_level_4', aggfunc='mean'),
    part_conso_level_4_median = pd.NamedAgg(column='part_conso_level_4', aggfunc='median'),
    conso_level_1_sum = pd.NamedAgg(column='consommation_level_1', aggfunc='sum'),
    conso_level_2_sum = pd.NamedAgg(column='consommation_level_2', aggfunc='sum'),
    conso_level_3_sum = pd.NamedAgg(column='consommation_level_3', aggfunc='sum'),
    conso_level_4_sum = pd.NamedAgg(column='consommation_level_4', aggfunc='sum'),
    conso_level_1_min = pd.NamedAgg(column='consommation_level_1', aggfunc='min'),
    conso_level_1_max = pd.NamedAgg(column='consommation_level_1', aggfunc='max'),
    conso_level_1_std = pd.NamedAgg(column='consommation_level_1', aggfunc='std'),
    conso_level_1_mean = pd.NamedAgg(column='consommation_level_1', aggfunc='mean'),
    conso_level_1_median = pd.NamedAgg(column='consommation_level_1', aggfunc='median'),
    conso_level_2_min = pd.NamedAgg(column='consommation_level_2', aggfunc='min'),
    conso_level_2_max = pd.NamedAgg(column='consommation_level_2', aggfunc='max'),
    conso_level_2_std = pd.NamedAgg(column='consommation_level_2', aggfunc='std'),
    conso_level_2_mean = pd.NamedAgg(column='consommation_level_2', aggfunc='mean'),
    conso_level_2_median = pd.NamedAgg(column='consommation_level_2', aggfunc='median'),
    conso_level_3_min = pd.NamedAgg(column='consommation_level_3', aggfunc='min'),
    conso_level_3_max = pd.NamedAgg(column='consommation_level_3', aggfunc='max'),
    conso_level_3_std = pd.NamedAgg(column='consommation_level_3', aggfunc='std'),
    conso_level_3_mean = pd.NamedAgg(column='consommation_level_3', aggfunc='mean'),
    conso_level_3_median = pd.NamedAgg(column='consommation_level_3', aggfunc='median'),
    conso_level_4_min = pd.NamedAgg(column='consommation_level_4', aggfunc='min'),
    conso_level_4_max = pd.NamedAgg(column='consommation_level_4', aggfunc='max'),
    conso_level_4_std = pd.NamedAgg(column='consommation_level_4', aggfunc='std'),
    conso_level_4_mean = pd.NamedAgg(column='consommation_level_4', aggfunc='mean'),
    conso_level_4_median = pd.NamedAgg(column='consommation_level_4', aggfunc='median'),
    conso_level_1_var_min = pd.NamedAgg(column='variation_level_1', aggfunc='min'),
    conso_level_1_var_max = pd.NamedAgg(column='variation_level_1', aggfunc='max'),
    conso_level_1_var_std = pd.NamedAgg(column='variation_level_1', aggfunc='std'),
    conso_level_1_var_mean = pd.NamedAgg(column='variation_level_1', aggfunc='mean'),
    conso_level_1_var_median = pd.NamedAgg(column='variation_level_1', aggfunc='median'),
    conso_level_2_var_min = pd.NamedAgg(column='variation_level_2', aggfunc='min'),
    conso_level_2_var_max = pd.NamedAgg(column='variation_level_2', aggfunc='max'),
    conso_level_2_var_std = pd.NamedAgg(column='variation_level_2', aggfunc='std'),
    conso_level_2_var_mean = pd.NamedAgg(column='variation_level_2', aggfunc='mean'),
    conso_level_2_var_median = pd.NamedAgg(column='variation_level_2', aggfunc='median'),
    conso_level_3_var_min = pd.NamedAgg(column='variation_level_3', aggfunc='min'),
    conso_level_3_var_max = pd.NamedAgg(column='variation_level_3', aggfunc='max'),
    conso_level_3_var_std = pd.NamedAgg(column='variation_level_3', aggfunc='std'),
    conso_level_3_var_mean = pd.NamedAgg(column='variation_level_3', aggfunc='mean'),
    conso_level_3_var_median = pd.NamedAgg(column='variation_level_3', aggfunc='median'),
    conso_level_4_var_min = pd.NamedAgg(column='variation_level_4', aggfunc='min'),
    conso_level_4_var_max = pd.NamedAgg(column='variation_level_4', aggfunc='max'),
    conso_level_4_var_std = pd.NamedAgg(column='variation_level_4', aggfunc='std'),
    conso_level_4_var_mean = pd.NamedAgg(column='variation_level_4', aggfunc='mean'),
    conso_level_4_var_median = pd.NamedAgg(column='variation_level_4', aggfunc='median'),
    nbre_outlier_conso_level_1 = pd.NamedAgg(column='outlier_level_1', aggfunc='sum'),
    nbre_outlier_conso_level_2 = pd.NamedAgg(column='outlier_level_2', aggfunc='sum'),
    nbre_outlier_conso_level_3 = pd.NamedAgg(column='outlier_level_3', aggfunc='sum'),
    nbre_outlier_conso_level_4 = pd.NamedAgg(column='outlier_level_4', aggfunc='sum'),
    nbre_outlier_conso_var_level_1 = pd.NamedAgg(column='outlier_var_level_1', aggfunc='sum'),
    nbre_outlier_conso_var_level_2 = pd.NamedAgg(column='outlier_var_level_2', aggfunc='sum'),
    nbre_outlier_conso_var_level_3 = pd.NamedAgg(column='outlier_var_level_3', aggfunc='sum'),
    nbre_outlier_conso_var_level_4 = pd.NamedAgg(column='outlier_var_level_4', aggfunc='sum'),
    conso_sum = pd.NamedAgg(column='consommation_total', aggfunc='sum'),
    conso_min = pd.NamedAgg(column='consommation_total', aggfunc='min'),
    conso_max = pd.NamedAgg(column='consommation_total', aggfunc='max'),
    conso_std = pd.NamedAgg(column='consommation_total', aggfunc='std'),
    conso_mean = pd.NamedAgg(column='consommation_total', aggfunc='mean'),
    conso_median = pd.NamedAgg(column='consommation_total', aggfunc='median'),
    nbre_outlier_conso = pd.NamedAgg(column='outlier_conso_total_1', aggfunc='sum'),
    conso_var_min = pd.NamedAgg(column='variation_conso', aggfunc='min'),
    conso_var_max = pd.NamedAgg(column='variation_conso', aggfunc='max'),
    conso_var_std = pd.NamedAgg(column='variation_conso', aggfunc='std'),
    conso_var_mean = pd.NamedAgg(column='variation_conso', aggfunc='mean'),
    conso_var_median = pd.NamedAgg(column='variation_conso', aggfunc='median'),
    nbre_outlier_var_conso = pd.NamedAgg(column='outlier_variation_1', aggfunc='sum'),
    is_tarif_type_9 = pd.NamedAgg(column='tarif_type_9', aggfunc='sum'),
    is_tarif_type_10 = pd.NamedAgg(column='tarif_type_10', aggfunc='sum'),
    is_tarif_type_11 = pd.NamedAgg(column='tarif_type_11', aggfunc='sum'),
    is_tarif_type_12 = pd.NamedAgg(column='tarif_type_12', aggfunc='sum'),
    is_tarif_type_13 = pd.NamedAgg(column='tarif_type_13', aggfunc='sum'),
    is_tarif_type_14 = pd.NamedAgg(column='tarif_type_14', aggfunc='sum'),
    is_tarif_type_15 = pd.NamedAgg(column='tarif_type_15', aggfunc='sum'),
    is_tarif_type_18 = pd.NamedAgg(column='tarif_type_18', aggfunc='sum'),
    is_tarif_type_21 = pd.NamedAgg(column='tarif_type_21', aggfunc='sum'),
    is_tarif_type_24 = pd.NamedAgg(column='tarif_type_24', aggfunc='sum'),
    is_tarif_type_29 = pd.NamedAgg(column='tarif_type_29', aggfunc='sum'),
    is_counter_statue_0 = pd.NamedAgg(column='counter_statue_0', aggfunc='sum'),
    is_counter_statue_1 = pd.NamedAgg(column='counter_statue_1', aggfunc='sum'),
    is_counter_statue_2 = pd.NamedAgg(column='counter_statue_2', aggfunc='sum'),
    is_counter_statue_3 = pd.NamedAgg(column='counter_statue_3', aggfunc='sum'),
    is_counter_statue_4 = pd.NamedAgg(column='counter_statue_4', aggfunc='sum'),
    is_counter_statue_5 = pd.NamedAgg(column='counter_statue_5', aggfunc='sum'),
    is_counter_coefficient_1 = pd.NamedAgg(column='counter_coefficient_1', aggfunc='sum'),
    is_counter_coefficient_20 = pd.NamedAgg(column='counter_coefficient_20', aggfunc='sum'),
    is_conso_null = pd.NamedAgg(column='is_null_conso_1', aggfunc='sum'),
    dominant_level_1 = pd.NamedAgg(column='dominant_level_1.0', aggfunc='sum'),
    dominant_level_2 = pd.NamedAgg(column='dominant_level_2.0', aggfunc='sum'),
    dominant_level_3 = pd.NamedAgg(column='dominant_level_3.0', aggfunc='sum'),
    dominant_level_4 = pd.NamedAgg(column='dominant_level_4.0', aggfunc='sum')
    ).reset_index()

In [35]:
df_train_merged = pd.merge(df_client, df_agg, on='client_id', how='right')

In [None]:
df_train_merged['last_invoice_date'] = pd.to_datetime(df_train_merged['last_invoice_date'])
df_train_merged['creation_date'] = pd.to_datetime(df_train_merged['creation_date'])
df_train_merged['seniority'] = (df_train_merged['last_invoice_date'] - df_train_merged['creation_date']).dt.days


In [45]:
df_train_merged = deplacer_colonnes(df_train_merged, 'target',127)

In [46]:
df_train_merged.head()

Unnamed: 0,client_id,district,client_catg,region,creation_date,last_invoice_date,counter_number,counter_code,reading_remarque_6,reading_remarque_7,...,is_counter_statue_5,is_counter_coefficient_1,is_counter_coefficient_20,is_conso_null,dominant_level_1,dominant_level_2,dominant_level_3,dominant_level_4,seniority,target
0,train_Client_0,60,11,101,1994-12-31,2019-03-19,1335667,207,21.0,0.0,...,0.0,35.0,0.0,0.0,35.0,0.0,0.0,0.0,8844,0.0
1,train_Client_1,69,11,107,2002-05-29,2019-04-02,678902,203,20.0,0.0,...,0.0,37.0,0.0,0.0,37.0,0.0,0.0,0.0,6152,0.0
2,train_Client_10,62,11,301,1986-03-13,2019-05-02,572765,207,10.0,0.0,...,0.0,18.0,0.0,0.0,18.0,0.0,0.0,0.0,12103,0.0
3,train_Client_100,69,11,105,1996-07-11,2012-09-25,2078,413,19.0,0.0,...,0.0,20.0,0.0,16.0,4.0,0.0,0.0,0.0,5920,0.0
4,train_Client_1000,62,11,303,2014-10-14,2019-06-17,19575,207,0.0,0.0,...,0.0,14.0,0.0,0.0,14.0,0.0,0.0,0.0,1707,0.0


## FEATURES EDA