In [1]:
!pip install pandas

You should consider upgrading via the '/home/alessandro/.asdf/installs/python/3.10.5/bin/python3.10 -m pip install --upgrade pip' command.[0m[33m
[0m

In [2]:
import pandas
from functools import reduce
import re

df = pandas.read_csv('db-a.csv')
df_mirror = df

with open('constraints.txt', 'r') as file:
    first_line = file.readline().strip()

constraints = first_line.split(',')

In [3]:
print(constraints)
print()
print(df)

['t1.salario > t2.salario', 't1.ano < t2.ano']

   id   ano      departamento  salario   bonus
0   1  2023  Recursos Humanos    50000   10000
1   2  2023            Vendas    60000  350000
2   3  2022        Tecnologia    75000  100000
3   4  2024         Marketing    55000   80000
4   5  2023        Financeiro    70000   90000
5   6  2023         Operações    48000       0


In [4]:
def get_col_name(alias, constraint):
    pattern = re.escape(alias) + r'\.(\w+)'
    correspondences = re.findall(pattern, constraint)
    if correspondences:
        return correspondences[0]
    return None

In [5]:
print(get_col_name('t1', 't1.salario >= t2.bonus'))
print(get_col_name('t2', 't1.salario >= t2.bonus'))

salario
bonus


In [6]:
def switch_t1_by_scalar(constraint, value):
    pattern = r't1\.\w+'
    new_expression = re.sub(pattern, str(value), constraint)
    return new_expression

def resolve_left_side_constraints(df_tuple, constraints):
  lcs = []
  for cs in constraints:
    t1_col_name = get_col_name('t1', cs)
    t1_value = df_tuple[t1_col_name]
    new_cs = switch_t1_by_scalar(cs, t1_value)
    lcs.append(new_cs)

  return lcs

In [7]:
df_tuple = df.iloc[0]
resolve_left_side_constraints(df_tuple, constraints)

['50000 > t2.salario', '2023 < t2.ano']

In [8]:
def to_number_else_str(input_string):
    if input_string.isdigit() or (input_string[0] == '-' and input_string[1:].isdigit()):
        return int(input_string)
    else:
        return input_string

In [9]:
operators_fn = {
  '>': lambda scalar, df, col: scalar > df[col],
  '<': lambda scalar, df, col: scalar < df[col],
  '=': lambda scalar, df, col: scalar == df[col],
  '>=': lambda scalar, df, col: scalar >= df[col],
  '<=': lambda scalar, df, col: scalar <= df[col],
  '<>': lambda scalar, df, col: scalar != df[col],
}

def gen_pandas_condition(df, lcs):
    
    scalar = to_number_else_str(lcs.split(' ')[0])
    operator = lcs.split(' ')[1]
    col = get_col_name('t2', lcs)

    return operators_fn[operator](scalar, df, col)

In [10]:
for idx, line in df.iterrows():
  left_resolved_constraints = resolve_left_side_constraints(line, constraints)
  print(constraints)
  print(left_resolved_constraints)
  pandas_conditions = [ gen_pandas_condition(df, lcs) for lcs in left_resolved_constraints ]
  pandas_conditions_set = reduce(lambda x, y: x & y, pandas_conditions)
  print(df[pandas_conditions_set]['id'].to_list())
  print()


['t1.salario > t2.salario', 't1.ano < t2.ano']
['50000 > t2.salario', '2023 < t2.ano']
[]

['t1.salario > t2.salario', 't1.ano < t2.ano']
['60000 > t2.salario', '2023 < t2.ano']
[4]

['t1.salario > t2.salario', 't1.ano < t2.ano']
['75000 > t2.salario', '2022 < t2.ano']
[1, 2, 4, 5, 6]

['t1.salario > t2.salario', 't1.ano < t2.ano']
['55000 > t2.salario', '2024 < t2.ano']
[]

['t1.salario > t2.salario', 't1.ano < t2.ano']
['70000 > t2.salario', '2023 < t2.ano']
[4]

['t1.salario > t2.salario', 't1.ano < t2.ano']
['48000 > t2.salario', '2023 < t2.ano']
[]



In [11]:
tuples_qtd = df.shape[0]


df['violations'] = 0
df['targets'] = [[] for _ in range(tuples_qtd)]

for idx, line in df.iterrows():
  left_resolved_constraints = resolve_left_side_constraints(line, constraints)
  pandas_conditions = [ gen_pandas_condition(df, lcs) for lcs in left_resolved_constraints ]
  pandas_conditions_set = reduce(lambda x, y: x & y, pandas_conditions)
  violations_ids = df[pandas_conditions_set]['id'].to_list()
  df.at[idx, 'violations'] = len(violations_ids)
  df.at[idx, 'targets'] = violations_ids

print(df)

   id   ano      departamento  salario   bonus  violations          targets
0   1  2023  Recursos Humanos    50000   10000           0               []
1   2  2023            Vendas    60000  350000           1              [4]
2   3  2022        Tecnologia    75000  100000           5  [1, 2, 4, 5, 6]
3   4  2024         Marketing    55000   80000           0               []
4   5  2023        Financeiro    70000   90000           1              [4]
5   6  2023         Operações    48000       0           0               []
