# US States
## Let's generate a table containing all the US states

In [28]:
import os

from swelldb import SwellDB, OpenAILLM

In [29]:
swelldb: SwellDB = SwellDB(
    llm=OpenAILLM(api_key=os.environ["OPENAI_API_KEY"]), 
    serper_api_key=os.environ["SERPER_API_KEY"])

In [30]:
tbl = (
    swelldb.table_builder()
    .set_table_name("us_states")
    .set_content("A table that contains all the US states")
    .set_schema("state_name str, region str")
    .set_base_columns(["state_name"])
).build()

In [31]:
tbl.explain()

LLMTable[schema=['state_name', 'region']


In [32]:
table = tbl.materialize()

In [34]:
print(table.to_pandas()[:10])

    state_name     region
0      Alabama      South
1       Alaska       West
2      Arizona       West
3     Arkansas      South
4   California       West
5     Colorado       West
6  Connecticut  Northeast
7     Delaware      South
8      Florida      South
9      Georgia      South


## Let's add some SQL

In [41]:
import datafusion
import pyarrow as pa

sc = datafusion.SessionContext()
sc.register_dataset("us_states", pa.dataset.dataset(table))

print(sc.sql("SELECT * FROM us_states where region = 'South' LIMIT 5"))
print(sc.sql("SELECT COUNT(*), region FROM us_states GROUP BY region"))

DataFrame()
+------------+--------+
| state_name | region |
+------------+--------+
| Alabama    | South  |
| Arkansas   | South  |
| Delaware   | South  |
| Florida    | South  |
| Georgia    | South  |
+------------+--------+
DataFrame()
+----------+-----------+
| count(*) | region    |
+----------+-----------+
| 12       | Midwest   |
| 9        | Northeast |
| 16       | South     |
| 13       | West      |
+----------+-----------+
