# 로드 함수를 사용하여 jdbc 소스로부터 데이터를 로드

In [2]:
from pyspark.sql import SparkSession

spark = (SparkSession.builder
                     .config("spark.jars", "C:/Program Files (x86)/MySQL/Connector J 8.0/mysql-connector-j-8.0.33.jar")
                     .master("local")
                     .appName("mysql")
                     .getOrCreate())

In [7]:
df_department = spark.read.format("jdbc") \
    .options(
    url = "jdbc:mysql://localhost:3306/learningsparkdb",
    driver = "com.mysql.jdbc.Driver",
    dbtable = "Department",
    user = "root",
    password = "0000").load()

In [11]:
df_department.printSchema()

root
 |-- Id: integer (nullable = true)
 |-- Name: string (nullable = true)



In [10]:
df_department.show()

+---+-----+
| Id| Name|
+---+-----+
|  1|   IT|
|  2|Sales|
+---+-----+



In [45]:
df_department2 = df_department.withColumnRenamed("Id", "DepId").withColumnRenamed("Name","DepName")

In [15]:
df_employee = spark.read.format("jdbc") \
    .options(
    url = "jdbc:mysql://localhost:3306/learningsparkdb",
    driver = "com.mysql.jdbc.Driver",
    dbtable = "Employee",
    user = "root",
    password = "0000").load()

In [16]:
df_employee.printSchema()

root
 |-- Id: integer (nullable = true)
 |-- Name: string (nullable = true)
 |-- Salary: integer (nullable = true)
 |-- DepartmentId: integer (nullable = true)



In [17]:
df_employee.show()

+---+-----+------+------------+
| Id| Name|Salary|DepartmentId|
+---+-----+------+------------+
|  1|  Joe| 70000|           1|
|  2|  Jim| 90000|           1|
|  3|Henry| 80000|           2|
|  4|  Sam| 60000|           2|
|  5|  Max| 90000|           1|
+---+-----+------+------------+



In [20]:
df_employee.createOrReplaceTempView("Employee")

In [22]:
spark.sql("SELECT * FROM Employee").show()

+---+-----+------+------------+
| Id| Name|Salary|DepartmentId|
+---+-----+------+------------+
|  1|  Joe| 70000|           1|
|  2|  Jim| 90000|           1|
|  3|Henry| 80000|           2|
|  4|  Sam| 60000|           2|
|  5|  Max| 90000|           1|
+---+-----+------+------------+



In [40]:
df_department2.show()

+------------+--------------+
|DepartmentId|DepartmentName|
+------------+--------------+
|           1|            IT|
|           2|         Sales|
+------------+--------------+



In [49]:
new_df = df_employee.join(df_department2,
                df_employee.DepartmentId==df_department2.DepId)

In [50]:
new_df2 = new_df.drop("DepId")

In [51]:
new_df2.show()

+---+-----+------+------------+-------+
| Id| Name|Salary|DepartmentId|DepName|
+---+-----+------+------------+-------+
|  1|  Joe| 70000|           1|     IT|
|  2|  Jim| 90000|           1|     IT|
|  5|  Max| 90000|           1|     IT|
|  3|Henry| 80000|           2|  Sales|
|  4|  Sam| 60000|           2|  Sales|
+---+-----+------+------------+-------+



# 저장 함수를 사용하여 JDBC 소스에 데이터를 저장

In [52]:
(new_df2
    .write
    .format("jdbc")
    .options(
    url = "jdbc:mysql://localhost:3306/learningsparkdb",
    driver = "com.mysql.jdbc.Driver",
    dbtable = "Employee2",
    user = "root",
    password = "0000").save())