# Module

In [69]:
#Module
import itertools
import pandas as pd
import tqdm
import json

def import_rules_dict(RULES_JSON_PATH):
    #open rules json
    with open(RULES_JSON_PATH) as json_file:
        rules_dict = dict(json.load(json_file))
    return rules_dict

def update_rules_json(new_rules, RULES_PATH):
    #glob
    #json loads
    return

def check_rule(RULES_JSON_PATH):
        
    rules_dict = import_rules_dict(RULES_JSON_PATH)
    #
    all_rules = {}
    for id_ in rules_dict:
        features = [rules_dict[id_]['condition'][feature] for feature in rules_dict[id_]['condition']]
        all_rules[id_] = set(itertools.product(*features))

    rules_list = [all_rules[id_] for id_ in all_rules]
    intersections = set.intersection(*rules_list)
    intersections_by_id = {id_:(all_rules[id_] & intersections) for id_ in all_rules}
    id_by_intersection = {comb:[] for comb in intersections}
    for id_ in intersections_by_id:
        for comb in intersections_by_id[id_]:
            id_by_intersection[comb].append(id_)

    return rules_dict



def create_rule_masks(df, RULES_JSON_PATH):
    '''
    Apply rules to IVAMIRO in df (only & operator suported)

    :param df: dataframe to apply rules
    :param rules_json: json or dict containing rules (each rule is also a dictionary containinf values of features)
    :return: df with new values of IVAMIRO
    '''
    rules_json = import_rules_dict(RULES_JSON_PATH)
        
    #create msk_dict    

    msk_dict = {}
    for id_ in tqdm.tqdm(rules_json):
        
        i = 0
        for feature in rules_json[id_]['condition']:
            
            if i == 0:
                msk = df[feature].isin(rules_json[id_]['condition'][feature])
            else:
                msk = msk & df[feature].isin(rules_json[id_]['condition'][feature])
            i+=1                        
        
        msk_dict[id_] = {'mask':msk,'value':rules_json[id_]['implication']} # rules_json[id_]['implication'] is a dict of {<feature>:[<value>]}

    return msk_dict

def apply_rules(df, RULES_JSON_PATH):
    '''
    Applt rules to dataframe
    :param df:
    :param mask_dict:
    :return:
    '''

    mask_dict = create_rule_masks(df, RULES_JSON_PATH)    
    df = df.copy()
    for id_ in tqdm.tqdm(mask_dict):
        for feature in mask_dict[id_]['value']:
            df.loc[mask_dict[id_]['mask'],feature] = mask_dict[id_]['value'][feature]

    return df

def show_rules_df(RULES_JSON_PATH):
    rules_dict = import_rules_dict(RULES_JSON_PATH)
    df = pd.concat({k:pd.DataFrame(v) for k,v in (check_rule(RULES_JSON_PATH = RULES_PATH)).items()})
    df.index.set_names(['id','column'], inplace = True)
    return df

In [70]:
RULES_PATH = r'C:\Users\User Ambev\Desktop\Célula de analytics\Projetos\iva-apfj\src\data\rules_test.py'
DATA_PATH = r'C:\Users\User Ambev\Desktop\Célula de analytics\Projetos\iva-apfj\data\external\history.csv'

In [71]:
data = pd.read_csv(DATA_PATH, encoding = 'iso-8859-1', sep = ';')

# Create subset of data to test

In [72]:
test_data = data.iloc[:100]
test_data[['Filial','Material','IVAMIRO']]

Unnamed: 0,Filial,Material,IVAMIRO
0,BR46,50007747,I9
1,BRVB,50119095,C3
2,BR40,50408948,I9
3,BR16,50377153,I9
4,BR16,50007747,I7
...,...,...,...
95,BR19,50007747,I7
96,BR23,50007747,I7
97,BR40,50007747,I7
98,BR19,50007747,I7


# Check rules mask dict

In [74]:
create_rule_masks(df=test_data,RULES_JSON_PATH = RULES_PATH)#['12345']['mask'].sum()

100%|███████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 401.04it/s]


{'12345': {'mask': 0     False
  1     False
  2     False
  3     False
  4     False
        ...  
  95     True
  96     True
  97     True
  98     True
  99    False
  Length: 100, dtype: bool,
  'value': {'IVAMIRO': ['TESTE']}},
 '713123': {'mask': 0     False
  1     False
  2     False
  3     False
  4     False
        ...  
  95    False
  96    False
  97    False
  98    False
  99    False
  Length: 100, dtype: bool,
  'value': {'IVAMIRO': ['I8']}}}

# Apply rules to data

In [75]:
test_data = apply_rules(test_data,RULES_JSON_PATH = RULES_PATH)
test_data[['Filial','Material','IVAMIRO']]

100%|██████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 1003.66it/s]
100%|███████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 497.46it/s]


Unnamed: 0,Filial,Material,IVAMIRO
0,BR46,50007747,I9
1,BRVB,50119095,C3
2,BR40,50408948,I9
3,BR16,50377153,I9
4,BR16,50007747,I7
...,...,...,...
95,BR19,50007747,TESTE
96,BR23,50007747,TESTE
97,BR40,50007747,TESTE
98,BR19,50007747,TESTE


# Show rules data frame

In [76]:
show_rules_df(RULES_JSON_PATH = RULES_PATH)

Unnamed: 0_level_0,Unnamed: 1_level_0,condition,implication,date,user,description
id,column,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
12345,Material,"[50007747, 50335560, 425452]",,2019-01-01,99813554,this rules was created in order to do somethin...
12345,Filial,"[BR19, BR23, BR40]",,2019-01-01,99813554,this rules was created in order to do somethin...
12345,IVAMIRO,,[TESTE],2019-01-01,99813554,this rules was created in order to do somethin...
713123,Material,"[12345, 123, 6531]",,2019-05-01,9726759,this rules was created in order to do somethin...
713123,Filial,"[BR11, BRZV]",,2019-05-01,9726759,this rules was created in order to do somethin...
713123,Fornecedor,"[123124, 23124]",,2019-05-01,9726759,this rules was created in order to do somethin...
713123,IVAMIRO,,[I8],2019-05-01,9726759,this rules was created in order to do somethin...
