In [1]:
import sys
sys.path.append('../')
from database.fetch_data import *
import pandas as pd
from itertools import product
import string
from probabilistic_database_creation.rv_probability_assignments import *
from probabilistic_database_creation.prob_db_creation_dubio import *

In [2]:
#These constants can be changed dynamically

#Name of the inconsistent DB
table_name = "car_owner_corrupted"

#determinant attribute (LHS of FD): In person -> {model,color}, determinant = person
#TODO: Extend for multiple determinants
determinant_attributes = ["person"]

#dependent attributes (RHS of FD): In person -> {model,color}, dependent = model,color
dependent_attributes = ["model","color"]

In [3]:
#Creating dataframe for the inconsistent table
all_records = fetch_data_results(fetch_all_records(table_name,determinant_attributes,dependent_attributes))
data = pd.DataFrame(all_records, columns=determinant_attributes+dependent_attributes)

In [4]:
#Finding distinct dependent attributes and assigning a value to each of them
dependent_attributes_mappings = generate_rvs(data,dependent_attributes)
print(dependent_attributes_mappings)

[{1: 'Toyota', 2: 'Honda', 3: 'Mazda'}, {1: 'Red', 2: 'Blue', 3: 'Green'}]


In [5]:
#RV assinment and probability calculations for CWA
rv_assignments_CWA = create_rv_assignments_with_prob_CWA(data,dependent_attributes_mappings,determinant_attributes,dependent_attributes)

for determinant_attribute, assignments in rv_assignments_CWA.items():
    print(f"{determinant_attribute}:")
    for attr_dict in assignments:
        attr_str = ", ".join([f"{key}: {value['value']} (prob: {value['prob']})" for key, value in attr_dict.items()])
        print(f"  {attr_str}")

Frank:
  a1=1: Toyota (prob: 1.0), a1=2: Honda (prob: 0), a1=3: Mazda (prob: 0)
  b1=1: Red (prob: 0.5), b1=2: Blue (prob: 0.5), b1=3: Green (prob: 0)
Billy:
  a2=1: Toyota (prob: 0), a2=2: Honda (prob: 1.0), a2=3: Mazda (prob: 0)
  b2=1: Red (prob: 0), b2=2: Blue (prob: 1.0), b2=3: Green (prob: 0)
Jimmy:
  a3=1: Toyota (prob: 0), a3=2: Honda (prob: 0), a3=3: Mazda (prob: 1.0)
  b3=1: Red (prob: 0), b3=2: Blue (prob: 0), b3=3: Green (prob: 1.0)
Thomas:
  a4=1: Toyota (prob: 1.0), a4=2: Honda (prob: 0), a4=3: Mazda (prob: 0)
  b4=1: Red (prob: 0.5), b4=2: Blue (prob: 0.5), b4=3: Green (prob: 0)
Betty:
  a5=1: Toyota (prob: 0.3333333333333333), a5=2: Honda (prob: 0.3333333333333333), a5=3: Mazda (prob: 0.3333333333333333)
  b5=1: Red (prob: 0), b5=2: Blue (prob: 0), b5=3: Green (prob: 1.0)
Johnny:
  a6=1: Toyota (prob: 0), a6=2: Honda (prob: 0), a6=3: Mazda (prob: 1.0)
  b6=1: Red (prob: 0), b6=2: Blue (prob: 0), b6=3: Green (prob: 1.0)


In [6]:
#RV assinment and probability calculations for SWA
rv_assignments_SWA = create_rv_assignments_with_prob_SWA(data,dependent_attributes_mappings,determinant_attributes,dependent_attributes)

for determinant_attribute, assignments in rv_assignments_SWA.items():
    print(f"{determinant_attribute}:")
    for attr_dict in assignments:
        attr_str = ", ".join([f"{key}: {value['value']} (prob: {value['prob']})" for key, value in attr_dict.items()])
        print(f"  {attr_str}")

Frank:
  a1=1: Toyota (prob: 0.8), a1=2: Honda (prob: 0.1), a1=3: Mazda (prob: 0.1)
  b1=1: Red (prob: 0.4), b1=2: Blue (prob: 0.4), b1=3: Green (prob: 0.2)
Billy:
  a2=1: Toyota (prob: 0), a2=2: Honda (prob: 1), a2=3: Mazda (prob: 0)
  b2=1: Red (prob: 0), b2=2: Blue (prob: 1), b2=3: Green (prob: 0)
Jimmy:
  a3=1: Toyota (prob: 0), a3=2: Honda (prob: 0), a3=3: Mazda (prob: 1)
  b3=1: Red (prob: 0), b3=2: Blue (prob: 0), b3=3: Green (prob: 1)
Thomas:
  a4=1: Toyota (prob: 0.8), a4=2: Honda (prob: 0.1), a4=3: Mazda (prob: 0.1)
  b4=1: Red (prob: 0.4), b4=2: Blue (prob: 0.4), b4=3: Green (prob: 0.2)
Betty:
  a5=1: Toyota (prob: 0.3333333333333333), a5=2: Honda (prob: 0.3333333333333333), a5=3: Mazda (prob: 0.3333333333333333)
  b5=1: Red (prob: 0.1), b5=2: Blue (prob: 0.1), b5=3: Green (prob: 0.8)
Johnny:
  a6=1: Toyota (prob: 0), a6=2: Honda (prob: 0), a6=3: Mazda (prob: 1)
  b6=1: Red (prob: 0), b6=2: Blue (prob: 0), b6=3: Green (prob: 1)


In [7]:
#Sentence formation
sentences = compute_sentences(rv_assignments_SWA,data,determinant_attributes)
for determinant, tuples_list in sentences.items():
    print(f"{determinant}:")
    for tpl in tuples_list:
        print(tpl)

Frank:
(('Toyota', 'Red'), 'a1=1 & b1=1')
(('Toyota', 'Blue'), 'a1=1 & b1=2')
(('Toyota', 'Green'), 'a1=1 & b1=3')
(('Honda', 'Red'), 'a1=2 & b1=1')
(('Honda', 'Blue'), 'a1=2 & b1=2')
(('Honda', 'Green'), 'a1=2 & b1=3')
(('Mazda', 'Red'), 'a1=3 & b1=1')
(('Mazda', 'Blue'), 'a1=3 & b1=2')
(('Mazda', 'Green'), 'a1=3 & b1=3')
Billy:
(('Honda', 'Blue'), 'a2=2 & b2=2')
Jimmy:
(('Mazda', 'Green'), 'a3=3 & b3=3')
Thomas:
(('Toyota', 'Red'), 'a4=1 & b4=1')
(('Toyota', 'Blue'), 'a4=1 & b4=2')
(('Toyota', 'Green'), 'a4=1 & b4=3')
(('Honda', 'Red'), 'a4=2 & b4=1')
(('Honda', 'Blue'), 'a4=2 & b4=2')
(('Honda', 'Green'), 'a4=2 & b4=3')
(('Mazda', 'Red'), 'a4=3 & b4=1')
(('Mazda', 'Blue'), 'a4=3 & b4=2')
(('Mazda', 'Green'), 'a4=3 & b4=3')
Betty:
(('Toyota', 'Red'), 'a5=1 & b5=1')
(('Toyota', 'Blue'), 'a5=1 & b5=2')
(('Toyota', 'Green'), 'a5=1 & b5=3')
(('Honda', 'Red'), 'a5=2 & b5=1')
(('Honda', 'Blue'), 'a5=2 & b5=2')
(('Honda', 'Green'), 'a5=2 & b5=3')
(('Mazda', 'Red'), 'a5=3 & b5=1')
(('Mazda',

In [8]:
create_probabilistic_db(sentences,determinant_attributes,dependent_attributes)

Unnamed: 0,person,model,color,_sentence
0,Frank,Toyota,Red,a1=1 & b1=1
1,Frank,Toyota,Blue,a1=1 & b1=2
2,Frank,Toyota,Green,a1=1 & b1=3
3,Frank,Honda,Red,a1=2 & b1=1
4,Frank,Honda,Blue,a1=2 & b1=2
5,Frank,Honda,Green,a1=2 & b1=3
6,Frank,Mazda,Red,a1=3 & b1=1
7,Frank,Mazda,Blue,a1=3 & b1=2
8,Frank,Mazda,Green,a1=3 & b1=3
9,Billy,Honda,Blue,a2=2 & b2=2
