# Populating a Relational Database
## DSA Interview Questions
In this example we will build and populate a SQLite database with DSA interview questions.

## Creating the initial database
Let's create our initial tables. First, we will populate the smaller question type and question level tables. Then, we will populate the larger *question* table.

In [60]:
import pandas as pd
import pyarrow as pa
import sqlite3
import os

os.remove("dsa.db")
conn = sqlite3.connect("dsa.db")

In [61]:
# Create a question type table
conn.execute('''
CREATE TABLE IF NOT EXISTS question_type (
    id INTEGER PRIMARY KEY,
    name TEXT NOT NULL
)
'''
)

# Populate it with the following: Binary Search, Graph, Two Pointers, Dynamic Programming
conn.execute('''
INSERT INTO question_type (name) VALUES
    ('Binary Search'),
    ('Graph'),
    ('Two Pointers'),
    ('Dynamic Programming')
'''
)

# Create a question level table
conn.execute('''
CREATE TABLE IF NOT EXISTS question_level (
    id INTEGER PRIMARY KEY,
    name TEXT NOT NULL
)
'''
)

# Populate it with the following: Easy, Medium, Hard
conn.execute('''
INSERT INTO question_level (name) VALUES
    ('Easy'),
    ('Medium'),
    ('Hard')
'''
)

# Create a questions table
conn.execute('''
CREATE TABLE IF NOT EXISTS questions (
    id INTEGER PRIMARY KEY,
    name TEXT NOT NULL,
    link TEXT NOT NULL,
    question_type_id INTEGER,
    question_level_id INTEGER,
    FOREIGN KEY (question_type_id) REFERENCES question_type(id),
    FOREIGN KEY (question_level_id) REFERENCES question_level(id)
)
'''
)

conn.commit()

# Prepare the input to SwellDB
Since we need to populate the questions table, we need to create all the possible combinations of question types and levels.

In [62]:
# Get all combinations
combos = """
SELECT 
  qt.id as question_type_id,
  qt.name as question_type_name,
  ql.id as question_level_id,
  ql.name as question_level_name
FROM question_type as qt
CROSS JOIN question_level as ql
"""

In [63]:
combos = pd.read_sql(combos, conn)
combos

Unnamed: 0,question_type_id,question_type_name,question_level_id,question_level_name
0,1,Binary Search,1,Easy
1,1,Binary Search,2,Medium
2,1,Binary Search,3,Hard
3,2,Graph,1,Easy
4,2,Graph,2,Medium
5,2,Graph,3,Hard
6,3,Two Pointers,1,Easy
7,3,Two Pointers,2,Medium
8,3,Two Pointers,3,Hard
9,4,Dynamic Programming,1,Easy


In [64]:
# Convert the combinations to a pyarrow table
data = pa.Table.from_pandas(combos)

# SwellDB
Let's populate the *question* table with SwellDB.

In [65]:
import os
import logging

import datafusion

# SwellDB imports
from swelldb import SwellDB, OpenAILLM
from swelldb.swelldb import Mode
from swelldb.table_plan.table.physical.dataset_table import DatasetTable
from swelldb.table_plan.table.physical.llm_table import LLMTable
from swelldb.table_plan.table.physical.search_engine_table import SearchEngineTable

# Initialize a SwellDB instance
swelldb: SwellDB = SwellDB(
    llm=OpenAILLM(api_key=os.environ["OPENAI_API_KEY"]), 
    serper_api_key=os.environ["SERPER_API_KEY"])

In [66]:
questions = (
    swelldb.table_builder()
    .set_table_name("question")
    .set_content("A table that contains DSA questions from Leetcode. Given the input data, create as many as possible.")
    .set_schema("name str, link str, question_type_name str, question_type_id int, question_level_name str, question_level_id int")
    .set_base_columns(["question_type_name", "question_level_name"])
    .set_table_gen_mode(Mode.OPERATORS)
    .set_operators([LLMTable, SearchEngineTable])
    .set_data(data)
    .set_chunk_size(20)
).build()

In [67]:
logging.basicConfig(level=logging.ERROR)

In [68]:
questions.explain()

SearchEngineTable[schema=['name', 'link', 'question_type_name', 'question_level_name']
--LLMTable[schema=['question_type_id', 'question_level_id', 'question_level_name', 'question_type_name']


In [69]:
# Materialize the table
ds = questions.materialize()

sc = datafusion.SessionContext()
sc.deregister_table("questions")
sc.register_dataset("questions", pa.dataset.dataset(ds))

In [70]:
ds.to_pandas()

Unnamed: 0,name,link,question_type_name,question_level_name,question_type_id,question_level_id
0,Product of Array Except Self,https://leetcode.com/problems/product-of-array-except-self/,Array,Easy,1,1
1,Sort an Array,https://leetcode.com/problems/sort-an-array/,Array,Easy,1,1
2,Single Element in a Sorted Array,https://leetcode.com/problems/single-element-in-a-sorted-array/,Array,Easy,1,1
3,Two Sum,https://leetcode.com/problems/two-sum/,Array,Easy,1,1
4,Remove Duplicates from Sorted Array,https://leetcode.com/problems/remove-duplicates-from-sorted-array/,Array,Easy,1,1
5,Middle of the Linked List,https://leetcode.com/problems/middle-of-the-linked-list/,Linked List,Medium,2,2
6,Linked List Cycle,https://leetcode.com/problems/linked-list-cycle/,Linked List,Medium,2,2
7,Palindrome Linked List,https://leetcode.com/problems/palindrome-linked-list/,Linked List,Medium,2,2
8,Odd Even Linked List,https://leetcode.com/problems/odd-even-linked-list/,Linked List,Medium,2,2
9,Maximum Path Quality of a Graph,https://leetcode.com/problems/maximum-path-quality-of-a-graph/,Graph,Hard,3,3


In [71]:
sc.sql(""" 
SELECT *
FROM questions
""")

name,link,question_type_name,question_level_name,question_type_id,question_level_id
Product of Array Except S  Product of Array Except Self  ...,https://leetcode.com/prob  https://leetcode.com/problems/product-of-array-except-self/  ...,Array,Easy,1,1
Sort an Array,https://leetcode.com/prob  https://leetcode.com/problems/sort-an-array/  ...,Array,Easy,1,1
Single Element in a Sorte  Single Element in a Sorted Array  ...,https://leetcode.com/prob  https://leetcode.com/problems/single-element-in-a-sorted-array/  ...,Array,Easy,1,1
Two Sum,https://leetcode.com/prob  https://leetcode.com/problems/two-sum/  ...,Array,Easy,1,1
Remove Duplicates from So  Remove Duplicates from Sorted Array  ...,https://leetcode.com/prob  https://leetcode.com/problems/remove-duplicates-from-sorted-array/  ...,Array,Easy,1,1
Middle of the Linked List,https://leetcode.com/prob  https://leetcode.com/problems/middle-of-the-linked-list/  ...,Linked List,Medium,2,2
Linked List Cycle,https://leetcode.com/prob  https://leetcode.com/problems/linked-list-cycle/  ...,Linked List,Medium,2,2
Palindrome Linked List,https://leetcode.com/prob  https://leetcode.com/problems/palindrome-linked-list/  ...,Linked List,Medium,2,2
Odd Even Linked List,https://leetcode.com/prob  https://leetcode.com/problems/odd-even-linked-list/  ...,Linked List,Medium,2,2
Maximum Path Quality of a  Maximum Path Quality of a Graph  ...,https://leetcode.com/prob  https://leetcode.com/problems/maximum-path-quality-of-a-graph/  ...,Graph,Hard,3,3


In [72]:
ds.to_pandas().drop(columns=["question_type_name", "question_level_name"]).to_sql("questions", conn, if_exists="append", index=False)

35

In [73]:
query = """
SELECT q.name, ql.name as level, q.link
FROM questions q, question_type qt, question_level ql
WHERE q.question_type_id = qt.id
AND q.question_level_id = ql.id
"""

pd.set_option('display.max_colwidth', 400)

df = pd.read_sql(query, conn)

In [74]:
df

Unnamed: 0,name,level,link
0,Product of Array Except Self,Easy,https://leetcode.com/problems/product-of-array-except-self/
1,Sort an Array,Easy,https://leetcode.com/problems/sort-an-array/
2,Single Element in a Sorted Array,Easy,https://leetcode.com/problems/single-element-in-a-sorted-array/
3,Two Sum,Easy,https://leetcode.com/problems/two-sum/
4,Remove Duplicates from Sorted Array,Easy,https://leetcode.com/problems/remove-duplicates-from-sorted-array/
5,Middle of the Linked List,Medium,https://leetcode.com/problems/middle-of-the-linked-list/
6,Linked List Cycle,Medium,https://leetcode.com/problems/linked-list-cycle/
7,Palindrome Linked List,Medium,https://leetcode.com/problems/palindrome-linked-list/
8,Odd Even Linked List,Medium,https://leetcode.com/problems/odd-even-linked-list/
9,Maximum Path Quality of a Graph,Hard,https://leetcode.com/problems/maximum-path-quality-of-a-graph/
