Logs   
- [2023/03/08]   
  Restart this notebook if you change the scratch library

In [3]:
from typing import Tuple, Sequence, List, Any, Callable, Dict, Iterator
from collections import defaultdict

We try to mimic the behaviour of the standard database   
such as PostgreSQL, MySQL, SQL Server by creating from scratch    
NotQuiteABase

## Create Table Insert

- Table is a collection of rows
- Table has an association to a fixed *schema*
- Schema consists of column names and column types 

In [2]:
  # user_id  name   num_friends
users = [[0, "Hero", 0],
         [1, "Dunn", 2],
         [2, "Sue", 3],
         [3, "Chi", 3]]

The above table can be writtes as SQL like the following
```sql
CREATE TABLE users (
  user_id INT NOT NULL,
  _name VARCHAR(200),
  num_friends INT)
```

and inserting one by one each row

```sql
INSERT INTO users (user_id, _name, num_friend) VALUES (0, 'Hero', 0);
```

In [15]:
# A few type aliases we'll use later
Row = Dict[str, Any]                        # A database row
WhereClause = Callable[[Row], bool]         # Predicate for a single row
HavingClause = Callable[[List[Row]], bool]  # Predicate over multiple rows

## Implementation of SQL in Python using `Table` class

In [43]:
class Table(object):  
  def __init__(self, columns: List[str], types: List[type]) -> None:
    assert len(columns) == len(types), "# of columns must == # of types"

    self.columns = columns      # Names of columns
    self.types = types          # Data types of columns
    self.rows: List[Row] = []   # (no data yet)
  
  def col2type(self, col: str) -> type:
    """Get the type of a column"""
    idx = self.columns.index(col)     # Find the index of the column
    return self.types[idx]            # and return its type.

  def insert(self, values: list) -> None:
    # Check for right # of values
    if len(values) != len(self.types):
      raise ValueError(f"You need to provide {len(self.types)} values")

    # Check for right types of values
    for value, type_ in zip(values, self.types):
      if not isinstance(value, type_) and value is not None:
        raise TypeError(f"Expected type {type_} but got value {value}")

    # Add the corresponding dict as a "row"  
    self.rows.append(dict(zip(self.columns, values)))

  def update(self, updates: Dict[str, Any], 
              predicate: WhereClause = lambda row: True):
    """First make sure the updates have valid names and types"""
    for column, new_value in updates.items():
      if column not in self.columns:
        raise ValueError(f"invalid column: {column}")

      type_ = self.col2type(column)
      if not isinstance(new_value, type_) and new_value is not None:
        raise TypeError(f"expected type {type_}, but got {new_value}")

    # Now update
    for row in self.rows:
      if predicate(row):
        for column, new_value in updates.items():
          row[column] = new_value

  def delete(self, predicate: WhereClause = lambda row: True) -> None:
    """Delete all rows matching predicate""" 
    self.rows = [row for row in self.rows if not predicate(row)]


  def select(self, keep_columns: List[str] = None, 
             additional_columns: Dict[str, Callable] = None) -> "Table":
    """
    keep_column: it specifies the names of the columns you want to keep in
      the result. If you don't supply it, the result contains all columns
    additional_column: it is a dictionary whose keys are new column names
      and whose values are function specifying how to compute the values
      of the new columns. We'll peek at the type annotations of those
      functions to figure out the types of the new columns, so the functions
      will need to have annotated return types.
    """
    if keep_columns is None:        # If no columns specified
      keep_columns = self.columns   # return all columns

    if additional_columns is None: 
      additional_columns = {}

    # New column names and types
    new_columns = keep_columns + list(additional_columns.keys())
    keep_types = [self.col2type(col) for col in keep_columns]

    # This is how to get the return type from a type annotation
    # It will cras if `calculation` doesn't have a return type.
    add_types = [calculation.__annotations__['return']
                  for calculation in additional_columns.values()]

    # Create a new table for results
    new_table = Table(new_columns, keep_types + add_types)

    for row in self.rows:
      new_row = [row[column] for column in keep_columns]
      for column_name, calculation in additional_columns.items():
        new_row.append(calculation(row))
      new_table.insert(new_row)

    return new_table

  def where(self, predicate, WhereClause = lambda row: True) -> 'Table':
    """Return only the rows that satisfy the supplied predicate""" 
    where_table = Table(self.columns, self.types)
    for row in self.rows:
      if predicate(row):
        values = [row[column] for column in self.columns]
        where_table.insert(values)
    return where_table

  def limit(self, num_rows: int) -> "Table":
    """Return only the first `num_rows` rows"""
    limit_table = Table(self.columns, self.types)
    for i, row in enumerate(self.rows):
      if i >= num_rows:
        break 
      values = [row[column] for column in self.columns]
      limit_table.insert(values)
    return limit_table


  """The following methods treat a table like a List[Row]"""
  def __getitem__(self, idx: int) -> Row:
    return self.rows[idx]

  def __iter__(self) -> Iterator[Row]:
    return iter(self.rows)

  def __len__(self) -> int:
    return len(self.rows)

  
  def __repr__(self):
    """Pretty representation of the table: columns then rows""" 
    rows = "\n".join(str(row) for row in self.rows)
    return f"{self.columns}\n{rows}"

Create our `Users` table

In [44]:
# Constructor requires column names and types
users = Table(['user_id', 'name', 'num_friends'], [int, str, int])
users.insert([0, "Hero", 0])
users.insert([1, "Dunn", 2])
users.insert([2, "Sue", 3])
users.insert([3, "Chi", 3])
users.insert([4, "Thor", 3])
users.insert([5, "Clive", 2])
users.insert([6, "Hicks", 3])
users.insert([7, "Devin", 2])
users.insert([8, "Kate", 2])
users.insert([9, "Klein", 3])
users.insert([10, "Jen", 1])

print(users)

['user_id', 'name', 'num_friends']
{'user_id': 0, 'name': 'Hero', 'num_friends': 0}
{'user_id': 1, 'name': 'Dunn', 'num_friends': 2}
{'user_id': 2, 'name': 'Sue', 'num_friends': 3}
{'user_id': 3, 'name': 'Chi', 'num_friends': 3}
{'user_id': 4, 'name': 'Thor', 'num_friends': 3}
{'user_id': 5, 'name': 'Clive', 'num_friends': 2}
{'user_id': 6, 'name': 'Hicks', 'num_friends': 3}
{'user_id': 7, 'name': 'Devin', 'num_friends': 2}
{'user_id': 8, 'name': 'Kate', 'num_friends': 2}
{'user_id': 9, 'name': 'Klein', 'num_friends': 3}
{'user_id': 10, 'name': 'Jen', 'num_friends': 1}


Testing for the above list-like API for NoQuiteABase `Table`

In [45]:
assert len(users) == 11
assert users[1]['name'] == "Dunn"

## Update

We are going to add `update` functionality to `Users` class like
the `UPDATE` command in SQL
```sql
UPDATE users
  SET num_friends = 3
  WHERE user_id = 1;
```

See the implementation to the `Users` class to the previous section

Testing for `update` method

In [46]:
assert users[1]['num_friends'] == 2              # Original value

users.update({'num_friends': 3},                 # Set num_friends = 3
              lambda row: row['user_id'] == 1)   # in rows where user_id == 1

assert users[1]['num_friends'] == 3              # Update value

## DELETE

- A **dangerous** way deletes every row from a table
  ```sql
  DELETE FROM users;
  ```

- **less* dangerous ways add a `WHERE` clause and deletes only rows
  that match a certain condition
  ```sql
  DELETE FROM users WHERE user_id = 1;
  ```

See the implementation to `Users` class in the previous section.

Testing the `delete` method
```python
# We're not actually going to run these
users.delete(lambda row: row["user_id"] == 1)     # Deletes rows with user_id == 1
users.delete()
```

## SELECT

We implement the following `SELECT`, `WHERE`, and `LIMIT` commands
in our `Table` class 

```sql
SELECT * FROM users;                              -- get the entire contents
SELECT * FROM users LIMIT 2;                      -- get the first two rows
SELECT user_id FROM users;                        -- only get specific columns
SELECT user_id FROM users WHERE name = "Dunn";    -- only get specific row
```

```sql
-- get name column and compute its length then store it into a new column
-- with name_length
SELECT LENGTH(name) AS name_length FROM users; 
```

Testing the implementation of `.select`, `.where` and `.limit` method

In [48]:
# SELECT * FROM users;
all_users = users.select()
assert len(all_users) == 11

# SELECT * FROM users LIMIT 2;
two_users = users.limit(2)
assert len(two_users) == 2

# SELECT user_id FROM USERS;
just_ids = users.select(keep_columns=["user_id"])
assert just_ids.columns == ['user_id']

# SELECT user_id FROM users WHERE name == "Dunn";
dunn_ids = (
  users
  .where(lambda row: row["name"] == "Dunn")
  .select(keep_columns=["user_id"])
)

# SELECT LENGTH(name) AS name_length FROM users;
def name_length(row) -> int: return len(row["name"])

name_length = users.select(keep_columns=[],
                            additional_columns={"name_length": name_length})
assert name_length[0]['name_length'] == len("Hero")