# SPARK SQL

SQL 쿼리를 통해 Spark Dataframe을 다룰 수 있도록 해주는 기능

## 1. 임시 테이블

In [None]:
from pyspark.sql import SparkSession

spark = SparkSession.builder \
        .master("local") \
        .appName("Colab") \
        .getOrCreate()

In [None]:
spark.conf.get("spark.sql.catalogImplementation")

#### 1) 테이블 생성

In [None]:
## 테이블 생성

cars = spark.read.csv()

cars.createOrReplaceTempView("cars")

In [None]:
spark.sql("SHOW DATABASES").show()

In [None]:
spark.sql("SHOW TABLES FROM default").show()

#### 2) 저장된 테이블 조회

In [None]:
spark.sql("select * from cars").show()

In [None]:
spark.read.table('cars')

## 2. 영구 테이블

In [None]:
from pyspark.sql import SparkSession

spark = SparkSession.builder \
        .master("local") \
        .appName("Colab") \
        .enableHiveSupport() \
        .getOrCreate()

In [None]:
spark.conf.get("spark.sql.catalogImplementation")

#### 1) Database 생성 및 조회

In [None]:
spark.sql("CREATE DATABASE temp")

spark.sql("SHOW DATABASES").show()

#### 2) Table 생성 및 조회

In [None]:
cars = spark.read.csv()

## 임시
cars.createOrReplaceTempView("cars")

In [None]:
## 영구
spark.sql("CREATE TABLE temp.person (id Int, name String)")

spark.sql("INSERT INTO temp.person VALUES (1, 'Tom'), (2, 'Ann')")

In [None]:
## 영구

cars.write.saveAsTable("temp.cars_per")

In [None]:
spark.sql("SHOW TABLES FROM temp")

#### 3) 저장된 테이블 조회

In [None]:
spark.sql("select * from cars").show()

spark.read.table()

In [None]:
spark.sql("SHOW DATABASES")

spark.sql("CREATE DATABASE temp")

spark.sql("USE temp")

spark.sql("SHOW TABLES FROM temp")

In [None]:
from IPython.display import clear_output

def execute_spark_queries():
    while True:
        _query = input("Enter your Spark SQL query (type 'q' to quit): ")
        clear_output(wait=True)
        if _query.lower() == "q":
            break
        try:
            result = spark.sql(_query)
            result.show()
        except Exception as e:
            print(f"Error executing query: {e}")

execute_spark_queries()