# 3 Implementierung KeyRelation (5 Punkte)

Copyright Jens Dittrich & Marcel Maltry, [Big Data Analytics Group](https://bigdata.uni-saarland.de/), [CC-BY-SA](https://creativecommons.org/licenses/by-sa/4.0/legalcode)

In [9]:
from collections import defaultdict
from ra.utils import load_csv
from ra.relation import Relation

## The `Relation` class
    
The class `Relation` is implemented in `ra.relation` and implements the following methods:
* `add_tuple(tup)`: Adds the tuple `tup` if the tuple's schema is valid.
* Several print methods

**Remember:** Neither the order of rows nor the order of columns carry any meaning in a relation!

### Exercise
Extend class `Relation` to support keys and unique attributes and check for duplicates of keys (uniques) when adding tuples:

In [10]:
# upload the contents of this cell to our CMS as a text file

# a relation subclass respecting key/unique constraints:
class KeyRelation(Relation):
    # keys: names of the key attributes as a list
    # uniques: names of the unique attributes as a list
    def __init__(self, name, schema, keys, uniques):
        super().__init__(name, schema)

        # assert that the list of keys/uniques is subset-equal self-attributes:
        assert set(keys) <= set(self.attributes)
        assert set(uniques) <= set(self.attributes)
        # make sure that at least one key/unique attribute is defined:
        assert len(keys) >= 1
        assert len(uniques) >= 1

        # add your code here!
        self.unique_idx:list[int] = []
        self.key_idx:list[int] = []
        for i, e in enumerate(schema):
            if e[0] in uniques:
                self.unique_idx.append(i)
            if e[0] in keys:
                self.key_idx.append(i)
        self.uniques = set()
        self.keys = set()

    def add_tuple(self, tup):
        # check if there is a tuple with the same key/unique in the relation
        # only insert it using super().add_tuple(tup) if there is not.
        # raise a ValueError if the key or unique is already present.
        # Make sure to perform your check in O(1) time!
        # O(num_uniques)+O(check_shema) but ok ;)
        self._check_schema(tup)

        #check key
        curr_key = tuple(map(lambda x: tup[x], self.key_idx))
        if curr_key in self.keys:
            raise ValueError("Key")
        self.keys.add(curr_key)

        curr_unique = tuple(map(lambda x: tup[x], self.unique_idx))
        print(curr_key, curr_unique, self.keys, self.uniques)
        if curr_unique in self.uniques:
            raise ValueError("Unique")
        self.uniques.add(curr_unique)

        return super().add_tuple(tup)

    def print_schema(self):
        super().print_schema()
        # add your code here!
        # ...
        # should also print the key and unique attributes
        pass

### Unit Test for Relation

Note that test cases are by no means exhaustive!

In [11]:
import unittest

class RelationTest(unittest.TestCase):

    def setUp(self):
        self.foo = Relation('foo', [('id', int), ('name', str)])
        self.foo.add_tuple( (2,'Hello') )
        self.foo.add_tuple( (7,'World') )
        self.foo.add_tuple( (1,'!') )

        self.bar = Relation('bar', [('a', int), ('b', int), ('c', int), ('d', int)])
        self.bar.add_tuple( (1, 2, 3, 4) )
        self.bar.add_tuple( (2, 2, 3, 4) )
        self.bar.add_tuple( (3, 2, 3, 4) )
        self.bar.add_tuple( (4, 2, 3, 4) )
        self.bar.add_tuple( (5, 2, 3, 4) )
        
    def test_size(self):
        # foo should contain 3 tuples
        self.assertEqual(len(self.foo), 3)
        # check valid insert
        self.assertTrue(self.foo.add_tuple( (3, '?') ))
        self.assertEqual(len(self.foo), 4)
        # check duplicate insert
        self.assertFalse(self.foo.add_tuple( (1,'!') ))
        self.assertEqual(len(self.foo), 4)
        
        
        # bar should contain 5 tuples
        self.assertEqual(len(self.bar), 5)
        # check valid insert
        self.assertTrue(self.bar.add_tuple( (6, 2, 3, 4) ))
        self.assertEqual(len(self.bar), 6)
        # check duplicate insert
        self.assertFalse(self.bar.add_tuple( (5, 2, 3, 4) ))
        self.assertEqual(len(self.bar), 6)
    
    def test_schema(self):
        # incorrectly typed tuple
        with self.assertRaises(AssertionError):
            self.foo.add_tuple( ('wrong order', 42) )
        with self.assertRaises(AssertionError):
            self.foo.add_tuple( (0.1, 'wrong type') )
        # inccorectly sized tuples
        with self.assertRaises(AssertionError):
            self.foo.add_tuple( (6, 'wrong size', 12) )
        with self.assertRaises(AssertionError):
            self.foo.add_tuple( (42,) )
        
        # incorrectly typed tuple
        with self.assertRaises(AssertionError):
            self.bar.add_tuple( (0.1, 0.2, 0.3, 0.4) )
        with self.assertRaises(AssertionError):
            self.bar.add_tuple( ('1', '3', '2', '4') )
        # incorrectly sized
        with self.assertRaises(AssertionError):
            self.bar.add_tuple( (1, 2, 4, 5, 6) )
        with self.assertRaises(AssertionError):
            self.bar.add_tuple( (1, 2, 4) )

### Unit Test for KeyRelation

Note that test cases are by no means exhaustive!

In [12]:
class KeyRelationTest(unittest.TestCase):
    
    def setUp(self):
        keys = ['id']
        uniques = ['name']
        self.foo = KeyRelation('foo', [('id', int), ('name', str)], keys, uniques)
        self.foo.add_tuple( (1, 'first') )
        self.foo.add_tuple( (2, 'second') )
        self.foo.add_tuple( (3, 'thrid') )
        
        keys = ['a', 'c']
        uniques =['b', 'd']
        self.bar = KeyRelation('bar', [('a', int), ('b', int), ('c', int), ('d', int)], keys, uniques)
        self.bar.add_tuple( (1, 2, 1, 3) )
        self.bar.add_tuple( (1, 3, 2, 1) )
        self.bar.add_tuple( (2, 3, 2, 3) )
        self.bar.add_tuple( (2, 3, 1, 2) )
        
    def test_size(self):
        # foo should contain 3 tuples
        print(self.foo.tuples)
        self.assertEqual(len(self.foo), 3)
        # check valid insert
        self.foo.add_tuple( (4, 'fourth') )
        self.assertEqual(len(self.foo), 4)
        # check duplicate key insert
        with self.assertRaises(ValueError):
            self.foo.add_tuple( (1, 'one') ) # should raise ValueError  
        self.assertEqual(len(self.foo), 4)  # should not add tuple
        # check duplicate tuple insert
        with self.assertRaises(ValueError):
            self.foo.add_tuple( (1,'first') )  #should raise ValueError
        self.assertEqual(len(self.foo), 4)  # should not add tuple
        #check duplicate unique insert
        with self.assertRaises(ValueError):
            self.foo.add_tuple((5, 'first'))
        self.assertEqual(len(self.foo), 4)
        
        # bar should contain 4 tuples
        self.assertEqual(len(self.bar), 4)
        # check valid insert
        self.bar.add_tuple( (3, 1, 2, 3) )
        self.assertEqual(len(self.bar), 5)
        # check duplicate key insert
        with self.assertRaises(ValueError):
            self.bar.add_tuple( (1, 3, 1, 2) )  # should raise ValueError
        self.assertEqual(len(self.bar), 5)  # should not add tuple
        # check duplicate tuple insert
        with self.assertRaises(ValueError):
            self.bar.add_tuple( (2, 3, 1, 2) )  # should raise ValueError
        self.assertEqual(len(self.bar), 5)  # should not add tuple
        
        #check duplicate unique insert
        with self.assertRaises(ValueError):
            self.bar.add_tuple( (5, 3, 4, 2) ) # should raise ValueError
        self.assertEqual(len(self.bar), 5)  # should not add tuple
        
        #check valid insert
        self.bar.add_tuple( (5, 3, 4, 4) )
        self.assertEqual(len(self.bar), 6)
        with self.assertRaises(ValueError):
            self.bar.add_tuple( (5, 3, 4, 4) )  # should raise ValueError           
        
    
    def test_schema(self):
        # incorrectl<y typed tuple
        with self.assertRaises(AssertionError):
            self.foo.add_tuple( ('seventh', 7) )
        with self.assertRaises(AssertionError):
            self.foo.add_tuple( (0.1, 'zero point first') )
        # inccorectly sized tuples
        with self.assertRaises(AssertionError):
            self.foo.add_tuple( (42, 'oops', 12) )
        with self.assertRaises(AssertionError):
            self.foo.add_tuple( (42,) )
        
        # incorrectly typed tuple
        with self.assertRaises(AssertionError):
            self.bar.add_tuple( (0.1, 0.2, 0.3, 0.4) )
        with self.assertRaises(AssertionError):
            self.bar.add_tuple( ('1', '3', '2', '4') )
        # incorrectly sized
        with self.assertRaises(AssertionError):
            self.bar.add_tuple( (1, 2, 4, 5, 6) )
        with self.assertRaises(AssertionError):
            self.bar.add_tuple( (1, 2, 4) )

x = KeyRelationTest()
x.setUp()
x.test_size()

(1,) ('first',) {(1,)} set()
(2,) ('second',) {(1,), (2,)} {('first',)}
(3,) ('thrid',) {(1,), (2,), (3,)} {('first',), ('second',)}
(1, 1) (2, 3) {(1, 1)} set()
(1, 2) (3, 1) {(1, 1), (1, 2)} {(2, 3)}
(2, 2) (3, 3) {(1, 1), (1, 2), (2, 2)} {(2, 3), (3, 1)}
(2, 1) (3, 2) {(1, 1), (1, 2), (2, 1), (2, 2)} {(2, 3), (3, 3), (3, 1)}
{(2, 'second'), (1, 'first'), (3, 'thrid')}
(4,) ('fourth',) {(1,), (2,), (3,), (4,)} {('first',), ('second',), ('thrid',)}
(5,) ('first',) {(2,), (5,), (4,), (1,), (3,)} {('first',), ('fourth',), ('second',), ('thrid',)}
(3, 2) (1, 3) {(1, 2), (2, 1), (1, 1), (2, 2), (3, 2)} {(2, 3), (3, 2), (3, 3), (3, 1)}
(5, 4) (3, 2) {(1, 2), (2, 1), (1, 1), (5, 4), (2, 2), (3, 2)} {(3, 1), (2, 3), (3, 3), (3, 2), (1, 3)}


ValueError: Key

In [13]:
# Run the unit test without shutting down the jupyter kernel
unittest.main(argv=['ignored', '-v'], verbosity=2, exit=False)

test_schema (__main__.KeyRelationTest) ... ok
test_size (__main__.KeyRelationTest) ... ERROR
test_schema (__main__.RelationTest) ... ok
test_size (__main__.RelationTest) ... ok

ERROR: test_size (__main__.KeyRelationTest)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/tmp/ipykernel_34051/947920738.py", line 59, in test_size
    self.bar.add_tuple( (5, 3, 4, 4) )
  File "/tmp/ipykernel_34051/2044191184.py", line 39, in add_tuple
    raise ValueError("Key")
ValueError: Key

----------------------------------------------------------------------
Ran 4 tests in 0.011s

FAILED (errors=1)


(1,) ('first',) {(1,)} set()
(2,) ('second',) {(1,), (2,)} {('first',)}
(3,) ('thrid',) {(1,), (2,), (3,)} {('first',), ('second',)}
(1, 1) (2, 3) {(1, 1)} set()
(1, 2) (3, 1) {(1, 1), (1, 2)} {(2, 3)}
(2, 2) (3, 3) {(1, 1), (1, 2), (2, 2)} {(2, 3), (3, 1)}
(2, 1) (3, 2) {(1, 1), (1, 2), (2, 1), (2, 2)} {(2, 3), (3, 3), (3, 1)}
(1,) ('first',) {(1,)} set()
(2,) ('second',) {(1,), (2,)} {('first',)}
(3,) ('thrid',) {(1,), (2,), (3,)} {('first',), ('second',)}
(1, 1) (2, 3) {(1, 1)} set()
(1, 2) (3, 1) {(1, 1), (1, 2)} {(2, 3)}
(2, 2) (3, 3) {(1, 1), (1, 2), (2, 2)} {(2, 3), (3, 1)}
(2, 1) (3, 2) {(1, 1), (1, 2), (2, 1), (2, 2)} {(2, 3), (3, 3), (3, 1)}
{(2, 'second'), (1, 'first'), (3, 'thrid')}
(4,) ('fourth',) {(1,), (2,), (3,), (4,)} {('first',), ('second',), ('thrid',)}
(5,) ('first',) {(2,), (5,), (4,), (1,), (3,)} {('first',), ('fourth',), ('second',), ('thrid',)}
(3, 2) (1, 3) {(1, 2), (2, 1), (1, 1), (2, 2), (3, 2)} {(2, 3), (3, 2), (3, 3), (3, 1)}
(5, 4) (3, 2) {(1, 2), (2, 1),

<unittest.main.TestProgram at 0x7f339c3c03d0>