In [None]:
import os, sys
from datetime import date, timedelta
from pyspark.sql import SparkSession

In [None]:
# settings
os.environ["PYSPARK_PYTHON"] = sys.executable
os.environ["PYSPARK_DRIVER_PYTHON"] = sys.executable

In [1]:
spark = (
    SparkSession.builder
    .appName("df-load")
    .master("local[1]")
    .config("spark.ui.enabled", "false")
    .getOrCreate()
)

In [2]:
data = {
    "id": list(range(1, 11)),
    "value": [10.5, 20.1, 30.2, 40.8, 50.0, 60.3, 70.7, 80.9, 90.4, 100.6],
    "name": ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J"],
    "active": [True, False, True, True, False, True, False, True, False, True],
    "date": [date(2024, 1, 1) + timedelta(days=i) for i in range(10)]
}

In [3]:
rows = list(zip(
    data["id"],
    data["value"],
    data["name"],
    data["active"],
    data["date"],
))

In [4]:
columns = ["id", "value", "name", "active", "date"]

df_spark = spark.createDataFrame(rows, columns)

In [5]:
df_spark

DataFrame[id: bigint, value: double, name: string, active: boolean, date: date]

In [6]:
df_spark.show()

+---+-----+----+------+----------+
| id|value|name|active|      date|
+---+-----+----+------+----------+
|  1| 10.5|   A|  true|2024-01-01|
|  2| 20.1|   B| false|2024-01-02|
|  3| 30.2|   C|  true|2024-01-03|
|  4| 40.8|   D|  true|2024-01-04|
|  5| 50.0|   E| false|2024-01-05|
|  6| 60.3|   F|  true|2024-01-06|
|  7| 70.7|   G| false|2024-01-07|
|  8| 80.9|   H|  true|2024-01-08|
|  9| 90.4|   I| false|2024-01-09|
| 10|100.6|   J|  true|2024-01-10|
+---+-----+----+------+----------+



In [7]:
df_spark.createOrReplaceTempView("table")

In [8]:
spark.sql("""
    SELECT *
    FROM table
    WHERE active = true
""").show()

+---+-----+----+------+----------+
| id|value|name|active|      date|
+---+-----+----+------+----------+
|  1| 10.5|   A|  true|2024-01-01|
|  3| 30.2|   C|  true|2024-01-03|
|  4| 40.8|   D|  true|2024-01-04|
|  6| 60.3|   F|  true|2024-01-06|
|  8| 80.9|   H|  true|2024-01-08|
| 10|100.6|   J|  true|2024-01-10|
+---+-----+----+------+----------+

