# INSERT INTO DELTA TABLE

## GENERATE DELTA TABLE DATA

In [0]:
data_employee = [(4, "Mary Scala", "f", 230000, "Sales"),
                 (5, "Susan Liam", "f", 150000, "Sales"),
                 (6, "Xi Wuan", "f", 150000, "IT")]

schema_employee = "id INTEGER, name STRING, gender STRING, salary INTEGER, dept STRING"

In [0]:
df_employee = spark.createDataFrame(data=data_employee, schema=schema_employee)

df_employee.show()

+---+----------+------+------+-----+
| id|      name|gender|salary| dept|
+---+----------+------+------+-----+
|  4|Mary Scala|     f|230000|Sales|
|  5|Susan Liam|     f|150000|Sales|
|  6|   Xi Wuan|     f|150000|   IT|
+---+----------+------+------+-----+



In [0]:
df_employee.write.saveAsTable("default.table_data_employee")

## SPARK QUERY DATA

In [0]:
spark.table("default.table_data_employee").show()

+---+----------+------+------+-----+
| id|      name|gender|salary| dept|
+---+----------+------+------+-----+
|  4|Mary Scala|     f|230000|Sales|
|  5|Susan Liam|     f|150000|Sales|
|  6|   Xi Wuan|     f|150000|   IT|
+---+----------+------+------+-----+



## SPARK LIKE SQL QUERY DATA

In [0]:
spark.sql("SELECT * FROM table_data_employee").show()

+---+----------+------+------+-----+
| id|      name|gender|salary| dept|
+---+----------+------+------+-----+
|  4|Mary Scala|     f|230000|Sales|
|  5|Susan Liam|     f|150000|Sales|
|  6|   Xi Wuan|     f|150000|   IT|
+---+----------+------+------+-----+



## SQL QUERY DATA

In [0]:
%sql

SELECT * FROM table_data_employee;

id,name,gender,salary,dept
4,Mary Scala,f,230000,Sales
5,Susan Liam,f,150000,Sales
6,Xi Wuan,f,150000,IT


## GENERATE NEW DATA

In [0]:
data_employee_v2 = [(1, "Carl Mike", "m", 170000, "Support"),
                 (2, "Mikel Clark", "m", 254300, "IT"),
                 (3, "Bob Smith", "m", 220000, "IT")]

schema_employee_v2 = "id INTEGER, name STRING, gender STRING, salary INTEGER, dept STRING"

In [0]:
df_employee_v2 = spark.createDataFrame(data=data_employee_v2, schema=schema_employee_v2)

df_employee_v2.show()

+---+-----------+------+------+-------+
| id|       name|gender|salary|   dept|
+---+-----------+------+------+-------+
|  1|  Carl Mike|     m|170000|Support|
|  2|Mikel Clark|     m|254300|     IT|
|  3|  Bob Smith|     m|220000|     IT|
+---+-----------+------+------+-------+



## SPARK INSERT INTO OVERWRITE FALSE

In [0]:
df_employee_v2.write.insertInto("default.table_data_employee",overwrite=False)

In [0]:
spark.table("default.table_data_employee").show()

+---+-----------+------+------+-------+
| id|       name|gender|salary|   dept|
+---+-----------+------+------+-------+
|  1|  Carl Mike|     m|170000|Support|
|  4| Mary Scala|     f|230000|  Sales|
|  5| Susan Liam|     f|150000|  Sales|
|  2|Mikel Clark|     m|254300|     IT|
|  3|  Bob Smith|     m|220000|     IT|
|  6|    Xi Wuan|     f|150000|     IT|
+---+-----------+------+------+-------+



## SPARK INSERT INTO OVERWRITE TRUE

In [0]:
data_employee_v3 = [(1, "Pedro Jose", "m", 170000, "Sales"),
                 (2, "Mike Josh", "m", 100000, "IT"),
                 (3, "Eduard Clark", "m", 150000, "IT")]

schema_employee_v3 = "id INTEGER, name STRING, gender STRING, salary INTEGER, dept STRING"

In [0]:
df_employee_v3 = spark.createDataFrame(data=data_employee_v3, schema=schema_employee_v3)

df_employee_v3.show()

+---+------------+------+------+-----+
| id|        name|gender|salary| dept|
+---+------------+------+------+-----+
|  1|  Pedro Jose|     m|170000|Sales|
|  2|   Mike Josh|     m|100000|   IT|
|  3|Eduard Clark|     m|150000|   IT|
+---+------------+------+------+-----+



In [0]:
df_employee_v3.write.insertInto("default.table_data_employee",overwrite=True)

In [0]:
spark.table("default.table_data_employee").show()

+---+------------+------+------+-----+
| id|        name|gender|salary| dept|
+---+------------+------+------+-----+
|  1|  Pedro Jose|     m|170000|Sales|
|  3|Eduard Clark|     m|150000|   IT|
|  2|   Mike Josh|     m|100000|   IT|
+---+------------+------+------+-----+



## SQL STANDARD SQL INSERT INTO

In [0]:
data_employee_v4 = [(4, "Leila Michaels", "f", 195000, "HR"),
                 (5, "Paolo Cruz", "m", 188000, "Sales"),
                 (6, "John Smith", "m", 210000, "Sales")]

schema_employee_v4 = "id INTEGER, name STRING, gender STRING, salary INTEGER, dept STRING"

In [0]:
df_employee_v4 = spark.createDataFrame(data=data_employee_v4, schema=schema_employee_v4)

df_employee_v4.show()

+---+--------------+------+------+-----+
| id|          name|gender|salary| dept|
+---+--------------+------+------+-----+
|  4|Leila Michaels|     f|195000|   HR|
|  5|    Paolo Cruz|     m|188000|Sales|
|  6|    John Smith|     m|210000|Sales|
+---+--------------+------+------+-----+



In [0]:
df_employee_v4.createTempView("df_employee_v4")

In [0]:
%sql

INSERT INTO default.table_data_employee
SELECT * FROM df_employee_v4;

num_affected_rows,num_inserted_rows
3,3


In [0]:
%sql

SELECT * FROM table_data_employee;

id,name,gender,salary,dept
4,Leila Michaels,f,195000,HR
1,Pedro Jose,m,170000,Sales
5,Paolo Cruz,m,188000,Sales
6,John Smith,m,210000,Sales
3,Eduard Clark,m,150000,IT
2,Mike Josh,m,100000,IT
