# EXCEPT COLUMNS

## Dataframes

In [0]:
data_employees = [
    (1, 'Alice', 'HR', 'Manager', 50000, 'New York', 5),
    (2, 'Bob', 'IT', 'Developer', 70000, 'San Francisco', 3),
    (3, 'Charlie', 'Finance', 'Analyst', 65000, 'Chicago', 4),
    (4, 'Diane', 'Marketing', 'Specialist', 55000, 'Los Angeles', 2),
    (5, 'Eve', 'Sales', 'Executive', 60000, 'Miami', 6),
    (6, 'Frank', 'IT', 'Developer', 72000, 'Austin', 3),
    (7, 'Grace', 'HR', 'Assistant', 48000, 'Boston', 2),
    (8, 'Henry', 'Finance', 'Manager', 80000, 'Seattle', 7),
    (9, 'Irene', 'Marketing', 'Coordinator', 53000, 'Denver', 4),
    (10, 'Jack', 'Sales', 'Executive', 62000, 'San Diego', 5),
    (11, 'Karen', 'HR', 'Manager', 52000, 'Atlanta', 5),
    (12, 'Liam', 'IT', 'Support', 50000, 'San Francisco', 3),
    (13, 'Mia', 'Finance', 'Analyst', 64000, 'Chicago', 4),
    (14, 'Noah', 'Marketing', 'Specialist', 56000, 'Los Angeles', 2),
    (15, 'Olivia', 'Sales', 'Executive', 61000, 'Miami', 6),
    (16, 'Paul', 'IT', 'Developer', 73000, 'Austin', 3),
    (17, 'Quinn', 'HR', 'Assistant', 49000, 'Boston', 2),
    (18, 'Ruth', 'Finance', 'Manager', 81000, 'Seattle', 7),
    (19, 'Sam', 'Marketing', 'Coordinator', 54000, 'Denver', 4),
    (20, 'Tina', 'Sales', 'Executive', 63000, 'San Diego', 5)
]

schema_employees = "employee_id INTEGER, name STRING, dept STRING, title STRING, salary STRING, city STRING, experience INTEGER"

In [0]:
df_employees = spark.createDataFrame(data=data_employees, schema=schema_employees)
df_employees.show()

+-----------+-------+---------+-----------+------+-------------+----------+
|employee_id|   name|     dept|      title|salary|         city|experience|
+-----------+-------+---------+-----------+------+-------------+----------+
|          1|  Alice|       HR|    Manager| 50000|     New York|         5|
|          2|    Bob|       IT|  Developer| 70000|San Francisco|         3|
|          3|Charlie|  Finance|    Analyst| 65000|      Chicago|         4|
|          4|  Diane|Marketing| Specialist| 55000|  Los Angeles|         2|
|          5|    Eve|    Sales|  Executive| 60000|        Miami|         6|
|          6|  Frank|       IT|  Developer| 72000|       Austin|         3|
|          7|  Grace|       HR|  Assistant| 48000|       Boston|         2|
|          8|  Henry|  Finance|    Manager| 80000|      Seattle|         7|
|          9|  Irene|Marketing|Coordinator| 53000|       Denver|         4|
|         10|   Jack|    Sales|  Executive| 62000|    San Diego|         5|
|         11

In [0]:
data_departments = [
    ('HR', 120, 500000, 'New York'),
    ('IT', 150, 1200000, 'San Francisco'),
    ('Finance', 100, 750000, 'Chicago'),
    ('Marketing', 80, 600000, 'Los Angeles'),
    ('Sales', 130, 850000, 'Miami'),
    ('Support', 60, 400000, 'Austin'),
    ('Operations', 90, 700000, 'Seattle'),
    ('Legal', 50, 300000, 'Denver'),
    ('R&D', 110, 950000, 'Boston'),
    ('Customer Service', 70, 450000, 'San Diego')
]

schema_departments = "dept STRING, num_employees INTEGER, budget INTEGER, headquarters STRING"

In [0]:
df_departments = spark.createDataFrame(data=data_departments, schema=schema_departments)
df_departments.show()

+----------------+-------------+-------+-------------+
|            dept|num_employees| budget| headquarters|
+----------------+-------------+-------+-------------+
|              HR|          120| 500000|     New York|
|              IT|          150|1200000|San Francisco|
|         Finance|          100| 750000|      Chicago|
|       Marketing|           80| 600000|  Los Angeles|
|           Sales|          130| 850000|        Miami|
|         Support|           60| 400000|       Austin|
|      Operations|           90| 700000|      Seattle|
|           Legal|           50| 300000|       Denver|
|             R&D|          110| 950000|       Boston|
|Customer Service|           70| 450000|    San Diego|
+----------------+-------------+-------+-------------+



## TempView

In [0]:
df_employees.createOrReplaceTempView("employees")
df_departments.createOrReplaceTempView("departments")

## Normal Select

In [0]:
%sql

SELECT * FROM employees;

employee_id,name,dept,title,salary,city,experience
1,Alice,HR,Manager,50000,New York,5
2,Bob,IT,Developer,70000,San Francisco,3
3,Charlie,Finance,Analyst,65000,Chicago,4
4,Diane,Marketing,Specialist,55000,Los Angeles,2
5,Eve,Sales,Executive,60000,Miami,6
6,Frank,IT,Developer,72000,Austin,3
7,Grace,HR,Assistant,48000,Boston,2
8,Henry,Finance,Manager,80000,Seattle,7
9,Irene,Marketing,Coordinator,53000,Denver,4
10,Jack,Sales,Executive,62000,San Diego,5


## SQL 

In [0]:
%sql

SELECT * EXCEPT(name,city,experience)FROM employees;

employee_id,dept,title,salary
1,HR,Manager,50000
2,IT,Developer,70000
3,Finance,Analyst,65000
4,Marketing,Specialist,55000
5,Sales,Executive,60000
6,IT,Developer,72000
7,HR,Assistant,48000
8,Finance,Manager,80000
9,Marketing,Coordinator,53000
10,Sales,Executive,62000


In [0]:
%sql
SELECT 
  e.* EXCEPT (employee_id, dept, salary),
  d.* EXCEPT (num_employees)
FROM employees AS e
JOIN departments AS d
ON e.dept=d.dept

name,title,city,experience,dept,budget,headquarters
Charlie,Analyst,Chicago,4,Finance,750000,Chicago
Henry,Manager,Seattle,7,Finance,750000,Chicago
Mia,Analyst,Chicago,4,Finance,750000,Chicago
Ruth,Manager,Seattle,7,Finance,750000,Chicago
Alice,Manager,New York,5,HR,500000,New York
Grace,Assistant,Boston,2,HR,500000,New York
Karen,Manager,Atlanta,5,HR,500000,New York
Quinn,Assistant,Boston,2,HR,500000,New York
Bob,Developer,San Francisco,3,IT,1200000,San Francisco
Frank,Developer,Austin,3,IT,1200000,San Francisco


##Spark

In [0]:
from pyspark.sql.functions import col

In [0]:
df_departments.select([
    col for col in df_departments.columns if col not in {"budget","headquarters"}
]).show()

+----------------+-------------+
|            dept|num_employees|
+----------------+-------------+
|              HR|          120|
|              IT|          150|
|         Finance|          100|
|       Marketing|           80|
|           Sales|          130|
|         Support|           60|
|      Operations|           90|
|           Legal|           50|
|             R&D|          110|
|Customer Service|           70|
+----------------+-------------+

