# Populating a Relational Database
## DSA Interview Questions
In this example we will build and populate a SQLite database with DSA interview questions.

## Creating the initial database
Let's create our initial tables. First, we will populate the smaller question type and question level tables. Then, we will populate the larger *question* table.

In [1]:
import pandas as pd
import pyarrow as pa
import sqlite3
import os

conn = sqlite3.connect("dsa.db")

In [2]:
# Create a question type table
conn.execute('''
CREATE TABLE IF NOT EXISTS question_type (
    id INTEGER PRIMARY KEY,
    name TEXT NOT NULL
)
'''
)

# Populate it with the following: Binary Search, Graph, Two Pointers, Dynamic Programming
conn.execute('''
INSERT INTO question_type (name) VALUES
    ('Binary Search'),
    ('Graph'),
    ('Two Pointers'),
    ('Dynamic Programming')
'''
)

# Create a question level table
conn.execute('''
CREATE TABLE IF NOT EXISTS question_level (
    id INTEGER PRIMARY KEY,
    name TEXT NOT NULL
)
'''
)

# Populate it with the following: Easy, Medium, Hard
conn.execute('''
INSERT INTO question_level (name) VALUES
    ('Easy'),
    ('Medium'),
    ('Hard')
'''
)

# Create a questions table
conn.execute('''
CREATE TABLE IF NOT EXISTS questions (
    id INTEGER PRIMARY KEY,
    name TEXT NOT NULL,
    link TEXT NOT NULL,
    question_type_id INTEGER,
    question_level_id INTEGER,
    FOREIGN KEY (question_type_id) REFERENCES question_type(id),
    FOREIGN KEY (question_level_id) REFERENCES question_level(id)
)
'''
)

conn.commit()

# Prepare the input to SwellDB
Since we need to populate the questions table, we need to create all the possible combinations of question types and levels.

In [3]:
# Get all combinations
combos = """
SELECT 
  qt.id as question_type_id,
  qt.name as question_type_name,
  ql.id as question_level_id,
  ql.name as question_level_name
FROM question_type as qt
CROSS JOIN question_level as ql
"""

In [4]:
combos = pd.read_sql(combos, conn)
combos

Unnamed: 0,question_type_id,question_type_name,question_level_id,question_level_name
0,1,Binary Search,1,Easy
1,1,Binary Search,2,Medium
2,1,Binary Search,3,Hard
3,1,Binary Search,4,Easy
4,1,Binary Search,5,Medium
...,...,...,...,...
427,24,Dynamic Programming,14,Medium
428,24,Dynamic Programming,15,Hard
429,24,Dynamic Programming,16,Easy
430,24,Dynamic Programming,17,Medium


In [5]:
# Convert the combinations to a pyarrow table
data: pa.Table = pa.Table.from_pandas(combos)

# SwellDB
Let's populate the *question* table with SwellDB.

In [6]:
import os
import logging

import datafusion

# SwellDB imports
from swelldb import SwellDB, OpenAILLM
from swelldb.swelldb import Mode
from swelldb.table_plan.table.physical.dataset_table import DatasetTable
from swelldb.table_plan.table.physical.llm_table import LLMTable
from swelldb.table_plan.table.physical.search_engine_table import SearchEngineTable

# Initialize a SwellDB instance
swelldb: SwellDB = SwellDB(
    llm=OpenAILLM(api_key=os.environ["OPENAI_API_KEY"], model="gpt-4o"), 
    serper_api_key=os.environ["SERPER_API_KEY"])

In [23]:
questions = (
    swelldb.table_builder()
    .set_table_name("question")
    .set_content("A table that contains DSA questions from Leetcode. Given the input data, create as many as possible.")
    .set_schema("name str, link str, question_type_name str, question_type_id int, question_level_name str, question_level_id int")
    .set_base_columns(["question_type_name", "question_level_name"])
    .set_table_gen_mode(Mode.OPERATORS)
    .set_operators([LLMTable, SearchEngineTable])
    .set_data(data)
    .set_chunk_size(20)
).build()

In [8]:
logging.basicConfig(level=logging.ERROR)

In [9]:
questions.explain()

SearchEngineTable[schema=['question_type_id', 'question_level_id', 'question_type_name', 'question_level_name']
--LLMTable[schema=['question_type_name', 'question_level_name', 'name', 'link']


In [10]:
# Materialize the table
ds = questions.materialize()

sc = datafusion.SessionContext()
sc.deregister_table("questions")
sc.register_dataset("questions", pa.dataset.dataset(ds))

In [11]:
ds.to_pandas()

Unnamed: 0,question_type_id,question_level_id,question_type_name,question_level_name,name,link
0,1,1,Array,Easy,Two Sum,https://leetcode.com/problems/two-sum
1,2,2,Linked List,Medium,Add Two Numbers,https://leetcode.com/problems/add-two-numbers
2,3,1,String,Easy,Valid Parentheses,https://leetcode.com/problems/valid-parentheses
3,4,3,Dynamic Programming,Hard,Longest Valid Parentheses,https://leetcode.com/problems/longest-valid-pa...
4,5,2,Graph,Medium,Number of Islands,https://leetcode.com/problems/number-of-islands
5,6,1,Tree,Easy,Maximum Depth of Binary Tree,https://leetcode.com/problems/maximum-depth-of...
6,7,2,Heap,Medium,Kth Largest Element in an Array,https://leetcode.com/problems/kth-largest-elem...
7,8,3,Backtracking,Hard,N-Queens,https://leetcode.com/problems/n-queens
8,9,2,Binary Search,Medium,Search in Rotated Sorted Array,https://leetcode.com/problems/search-in-rotate...
9,10,1,Greedy,Easy,Best Time to Buy and Sell Stock,https://leetcode.com/problems/best-time-to-buy...


In [12]:
sc.sql(""" 
SELECT *
FROM questions
""")

question_type_id,question_level_id,question_type_name,question_level_name,name,link
1,1,Array,Easy,Two Sum,https://leetcode.com/prob  https://leetcode.com/problems/two-sum  ...
2,2,Linked List,Medium,Add Two Numbers,https://leetcode.com/prob  https://leetcode.com/problems/add-two-numbers  ...
3,1,String,Easy,Valid Parentheses,https://leetcode.com/prob  https://leetcode.com/problems/valid-parentheses  ...
4,3,Dynamic Programming,Hard,Longest Valid Parentheses,https://leetcode.com/prob  https://leetcode.com/problems/longest-valid-parentheses  ...
5,2,Graph,Medium,Number of Islands,https://leetcode.com/prob  https://leetcode.com/problems/number-of-islands  ...
6,1,Tree,Easy,Maximum Depth of Binary T  Maximum Depth of Binary Tree  ...,https://leetcode.com/prob  https://leetcode.com/problems/maximum-depth-of-binary-tree  ...
7,2,Heap,Medium,Kth Largest Element in an  Kth Largest Element in an Array  ...,https://leetcode.com/prob  https://leetcode.com/problems/kth-largest-element-in-an-array  ...
8,3,Backtracking,Hard,N-Queens,https://leetcode.com/prob  https://leetcode.com/problems/n-queens  ...
9,2,Binary Search,Medium,Search in Rotated Sorted Search in Rotated Sorted Array  ...,https://leetcode.com/prob  https://leetcode.com/problems/search-in-rotated-sorted-array  ...
10,1,Greedy,Easy,Best Time to Buy and Sell  Best Time to Buy and Sell Stock  ...,https://leetcode.com/prob  https://leetcode.com/problems/best-time-to-buy-and-sell-stock  ...


In [13]:
ds.to_pandas().drop(columns=["question_type_name", "question_level_name"]).to_sql("questions", conn, if_exists="append", index=False)

10

In [19]:
query = """
SELECT q.name AS name, qt.name as Type, ql.name As Level, q.link AS Link
FROM questions q, question_type qt, question_level ql
WHERE q.question_type_id = qt.id
AND q.question_level_id = ql.id
"""

pd.set_option('display.max_colwidth', 400)

df = pd.read_sql(query, conn)

In [20]:
df

Unnamed: 0,name,Type,Level,Link
0,Two Sum,Binary Search,Easy,https://leetcode.com/problem-list/array/
1,Add Two Numbers,Graph,Medium,https://leetcode.com/problem-list/linked-list/
2,Longest Substring Without Repeating Characters,Two Pointers,Medium,https://leetcode.com/problem-list/string/
3,Median of Two Sorted Arrays,Binary Search,Hard,https://leetcode.com/problem-list/array/
4,Valid Parentheses,Dynamic Programming,Easy,https://leetcode.com/problem-list/stack/
5,Merge Two Sorted Lists,Graph,Easy,https://leetcode.com/problem-list/linked-list/
6,Search in Rotated Sorted Array,Binary Search,Medium,https://leetcode.com/problem-list/binary-search/
7,Word Ladder,Graph,Hard,https://leetcode.com/problem-list/breadth-first-search/
8,Clone Graph,Two Pointers,Medium,https://leetcode.com/problem-list/depth-first-search/
9,Trapping Rain Water,Dynamic Programming,Hard,https://leetcode.com/problem-list/two-pointers/
