In [0]:
from delta.tables import *

DeltaTable.create(spark)\
    .tableName("employee_delta_del")\
    .addColumn("Emp_id", "INT")\
    .addColumn("Emp_name", "STRING")\
    .addColumn("Gender", "STRING")\
    .addColumn("Salary", "INT")\
    .addColumn("Dept", "STRING")\
    .property("Description", "Delta Table Created for Delta Delete Demo")\
    .location("/FileStore/tables/delta3/createtables").execute()


Out[1]: <delta.tables.DeltaTable at 0x7f797ca89f10>

In [0]:
%sql
SELECT * FROM employee_delta_del

Emp_id,Emp_name,Gender,Salary,Dept


### Populate Sample Data

In [0]:
%sql
INSERT INTO employee_delta_del VALUES
(100, 'Sabrina', 'F', 10000, 'Music'),
(101, 'Emily', 'F', 9500, 'Finance'),
(102, 'Michael', 'M', 12000, 'IT'),
(103, 'Olivia', 'F', 11000, 'Marketing'),
(104, 'James', 'M', 10500, 'Sales'),
(105, 'Sophia', 'F', 9800, 'HR'),
(106, 'William', 'M', 11500, 'Operations');

num_affected_rows,num_inserted_rows
7,7


In [0]:
%sql
SELECT * FROM employee_delta_del

Emp_id,Emp_name,Gender,Salary,Dept
100,Sabrina,F,10000,Music
101,Emily,F,9500,Finance
102,Michael,M,12000,IT
103,Olivia,F,11000,Marketing
104,James,M,10500,Sales
105,Sophia,F,9800,HR
106,William,M,11500,Operations


### Method 1: SQL

In [0]:
%sql
DELETE FROM employee_delta_del WHERE Emp_id=100

num_affected_rows
1


### Method 2: Using Delta Location

In [0]:
%sql
DELETE FROM delta. `/FileStore/tables/delta3/createtables` WHERE Emp_id=101

num_affected_rows
1


In [0]:
%sql
SELECT * FROM employee_delta_del

Emp_id,Emp_name,Gender,Salary,Dept
102,Michael,M,12000,IT
103,Olivia,F,11000,Marketing
104,James,M,10500,Sales
105,Sophia,F,9800,HR
106,William,M,11500,Operations


### Method 3: Spark SQl

In [0]:
spark.sql("DELETE FROM employee_delta_del WHERE Emp_id=102")

Out[10]: DataFrame[num_affected_rows: bigint]

### Method 3: PySpark Delta table Instance - Spark SQL Predicate

In [0]:
deltatable = DeltaTable.forName(spark, 'employee_delta_del')
# Declare the predicate by using the SQL-Formatted String
deltatable.delete("Emp_id= 103")

### Multiple conditions using SQL Predicate

In [0]:
deltatable.delete("Emp_id= 104 and Gender='M'")

In [0]:
%sql
SELECT * FROM employee_delta_del

Emp_id,Emp_name,Gender,Salary,Dept
105,Sophia,F,9800,HR
106,William,M,11500,Operations


In [0]:
from pyspark.sql.functions import col
deltatable.delete(col('Emp_id')== 106)

In [0]:
%sql
SELECT * FROM employee_delta_del

Emp_id,Emp_name,Gender,Salary,Dept
105,Sophia,F,9800,HR
