In [1]:
!pip install pandas

You should consider upgrading via the '/home/alessandro/.asdf/installs/python/3.10.5/bin/python3.10 -m pip install --upgrade pip' command.[0m[33m
[0m

In [2]:
import pandas
dataset = pandas.read_csv('db-a.csv')

with open('constraints.txt', 'r') as file:
    first_line = file.readline().strip()

constraints = first_line.split(',')

In [3]:
print(constraints)
print()
print(dataset)

['t1.salario > t2.salario', 't1.ano < t2.ano', 't1.salario < t1.bonus']

   id   ano      departamento  salario   bonus
0   1  2023  Recursos Humanos    50000   10000
1   2  2023            Vendas    60000  350000
2   3  2022        Tecnologia    75000  100000
3   4  2024         Marketing    55000   80000
4   5  2023        Financeiro    70000   90000
5   6  2023         Operações    48000       0


In [4]:
def split_components(predicate):
    components = predicate.split(' ')
    return {
        't1_col': components[0].split('.')[1],
        't2_col': components[2].split('.')[1],
        'operator': components[1],
        'same_table': components[0].split('.')[0] == components[2].split('.')[0]
    }


In [5]:
operators = {
  '>': lambda a, b: a > b,
  '<': lambda a, b: a < b,
  '=': lambda a, b: a == b,
  '>=': lambda a, b: a >= b,
  '<=': lambda a, b: a <= b,
  '<>': lambda a, b: a != b,
}

In [6]:
def constraint_to_lambda_func(constraint):
  pc = split_components(constraint)
  return {
    'lambda_fn': lambda t1, t2: operators[pc['operator']](t1[pc['t1_col']], t2[pc['t2_col']]),
    'same_table': pc['same_table']
  }

In [7]:
constraints_validations = [ constraint_to_lambda_func(i) for i in constraints]

cv_metadata = {
  'has_equal_table': any(cv['same_table'] for cv in constraints_validations),
  'has_diff_table': any(not cv['same_table'] for cv in constraints_validations),
}

def violates_all_constraints(constraints_validations, t1, t2):
  for cv in constraints_validations:
    result = cv['lambda_fn'](t1, t1) if cv['same_table'] else cv['lambda_fn'](t1, t2)
    if not result:
      return False
  
  return True

In [8]:
# print(violates_all_constraints(constraints_validations, dataset.iloc[1], dataset.iloc[3]))

In [9]:
tuples_qtd = dataset.shape[0]

dataset['violation'] = False

for t1_idx in range(tuples_qtd):
  for t2_idx in range(t1_idx + 1, tuples_qtd):
    t1 = dataset.iloc[t1_idx]
    t2 = dataset.iloc[t2_idx]
    dataset.at[t1_idx, 'violation'] = violates_all_constraints(constraints_validations, t1, t2)
    dataset.at[t2_idx, 'violation'] = violates_all_constraints(constraints_validations, t2, t1)

print(dataset)

   id   ano      departamento  salario   bonus  violation
0   1  2023  Recursos Humanos    50000   10000      False
1   2  2023            Vendas    60000  350000      False
2   3  2022        Tecnologia    75000  100000       True
3   4  2024         Marketing    55000   80000      False
4   5  2023        Financeiro    70000   90000      False
5   6  2023         Operações    48000       0      False


In [10]:
tuples_qtd = dataset.shape[0]

dataset['violation'] = False
dataset['target'] = [[] for _ in range(tuples_qtd)]

for t1_idx in range(tuples_qtd):
  for t2_idx in range(t1_idx + 1, tuples_qtd):
    t1 = dataset.iloc[t1_idx]
    t2 = dataset.iloc[t2_idx]
    dataset.at[t1_idx, 'violation'] = dataset.at[t1_idx, 'violation'] or violates_all_constraints(constraints_validations, t1, t2)
    dataset.at[t2_idx, 'violation'] = dataset.at[t2_idx, 'violation'] or violates_all_constraints(constraints_validations, t2, t1)

    if dataset.at[t1_idx, 'violation']:
      if cv_metadata['has_equal_table']:
        dataset.at[t1_idx, 'target'].append(t1['id'])
      if cv_metadata['has_diff_table']:
        dataset.at[t1_idx, 'target'].append(t2['id'])

    if dataset.at[t2_idx, 'violation']:
      if cv_metadata['has_equal_table']:
        dataset.at[t2_idx, 'target'].append(t2['id'])
      if cv_metadata['has_diff_table']:
        dataset.at[t2_idx, 'target'].append(t1['id'])

for i in range(tuples_qtd):
  dataset.at[i, 'target'] = list(set(dataset.at[i, 'target']))

print(dataset)

   id   ano      departamento  salario   bonus  violation              target
0   1  2023  Recursos Humanos    50000   10000      False                  []
1   2  2023            Vendas    60000  350000       True        [2, 4, 5, 6]
2   3  2022        Tecnologia    75000  100000       True  [1, 2, 3, 4, 5, 6]
3   4  2024         Marketing    55000   80000      False                  []
4   5  2023        Financeiro    70000   90000       True           [4, 5, 6]
5   6  2023         Operações    48000       0      False                  []
