# DataFusion Query DataFrame

In [9]:
from datafusion import SessionContext, col, lit

In [3]:
ctx = SessionContext()

In [4]:
df = ctx.from_pydict(
    {
        "first_name": ["li", "wang", "ron", "amanda"],
        "age": [25, 75, 68, 18],
        "country": ["china", "china", "us", "us"],
    },
    name="some_people",
)

In [5]:
df

DataFrame()
+------------+-----+---------+
| first_name | age | country |
+------------+-----+---------+
| li         | 25  | china   |
| wang       | 75  | china   |
| ron        | 68  | us      |
| amanda     | 18  | us      |
+------------+-----+---------+

## Filter DataFrame

In [6]:
ctx.sql("select * from some_people where age > 65")

DataFrame()
+------------+-----+---------+
| first_name | age | country |
+------------+-----+---------+
| wang       | 75  | china   |
| ron        | 68  | us      |
+------------+-----+---------+

In [11]:
df.filter(col("age") > lit(65))

DataFrame()
+------------+-----+---------+
| first_name | age | country |
+------------+-----+---------+
| wang       | 75  | china   |
| ron        | 68  | us      |
+------------+-----+---------+

## Select columns from DataFrame

In [13]:
ctx.sql("select first_name, country from some_people")

DataFrame()
+------------+---------+
| first_name | country |
+------------+---------+
| li         | china   |
| wang       | china   |
| ron        | us      |
| amanda     | us      |
+------------+---------+

In [14]:
df.select(col("first_name"), col("country"))

DataFrame()
+------------+---------+
| first_name | country |
+------------+---------+
| li         | china   |
| wang       | china   |
| ron        | us      |
| amanda     | us      |
+------------+---------+

## Aggregation query

In [18]:
ctx.sql("select country, count(*) as num_people from some_people group by country")

DataFrame()
+---------+------------+
| country | num_people |
+---------+------------+
| china   | 2          |
| us      | 2          |
+---------+------------+