In [1]:
import pandas as pd
import numpy as np
import sqlite3

In [2]:
cnn = sqlite3.connect('data30.db')
cur = cnn.cursor()

In [4]:
cur.execute('''
CREATE TABLE Department (
    DepartmentID INT PRIMARY KEY,
    DepartmentName VARCHAR(50)
);
''')

<sqlite3.Cursor at 0x1b1b931a340>

In [5]:
cur.execute('''
CREATE TABLE Employee (
    EmployeeID INT PRIMARY KEY,
    Name VARCHAR(100),
    Job VARCHAR(50),
    Salary INT,
    DepartmentID INT,
    FOREIGN KEY (DepartmentID) REFERENCES Department(DepartmentID)
);
''')

<sqlite3.Cursor at 0x1b1b931a340>

In [6]:
cur.execute('''
INSERT INTO Department (DepartmentID, DepartmentName) VALUES
(1, 'Sales'),
(2, 'Marketing'),
(3, 'Finance'),
(4, 'Human Resources');
''')

<sqlite3.Cursor at 0x1b1b931a340>

In [7]:
cur.execute('''
INSERT INTO Employee (EmployeeID, Name, Job, Salary, DepartmentID) VALUES
(1, 'John Doe', 'Sales Representative', 50000, 1),
(2, 'Jane Smith', 'Marketing Specialist', 60000, 2),
(3, 'Mike Johnson', 'Financial Analyst', 70000, 3),
(4, 'Emily Davis', 'HR Manager', 55000, 4),
(5, 'Robert Brown', 'Sales Representative', 75000, 1),
(6, 'Alice White', 'Marketing Specialist', 80000, 2),
(7, 'Daniel Lee', 'Senior Financial Analyst', 90000, 3),
(8, 'Olivia Miller', 'HR Manager', 70000, 4),
(9, 'William Turner', 'Sales Representative', 55000, 1),
(10, 'Sophia Martin', 'Financial Analyst', 60000, 2);
''')

<sqlite3.Cursor at 0x1b1b931a340>

In [7]:
cur.execute('''
SELECT * FROM Department;
''')

result = cur.fetchall()
department_df = pd.DataFrame(result, columns=[item[0] for item in cur.description])
display(department_df)

cur.execute('''
SELECT * FROM Employee;
''')

result = cur.fetchall()
employee_df = pd.DataFrame(result, columns=[item[0] for item in cur.description])
display(employee_df)

Unnamed: 0,DepartmentID,DepartmentName
0,1,Sales
1,2,Marketing
2,3,Finance
3,4,Human Resources


Unnamed: 0,EmployeeID,Name,Job,Salary,DepartmentID
0,1,John Doe,Sales Representative,50000,1
1,2,Jane Smith,Marketing Specialist,60000,2
2,3,Mike Johnson,Financial Analyst,70000,3
3,4,Emily Davis,HR Manager,55000,4
4,5,Robert Brown,Sales Representative,75000,1
5,6,Alice White,Marketing Specialist,80000,2
6,7,Daniel Lee,Senior Financial Analyst,90000,3
7,8,Olivia Miller,HR Manager,70000,4
8,9,William Turner,Sales Representative,55000,1
9,10,Sophia Martin,Financial Analyst,60000,2


# Single Row Sub-Query

### A single-row subquery is used when the outer query's results are based on a single value. 

### Display the information about employees whose salary are less than the maximum salary

#### Sub-Query

In [8]:
cur.execute('''
SELECT MAX(salary) 
FROM Employee;
''')

result = cur.fetchall()

df = pd.DataFrame(result, columns=[item[0] for item in cur.description])
display(df)

Unnamed: 0,MAX(salary)
0,90000


#### Main-Query

In [9]:
cur.execute('''
SELECT *
FROM Employee
WHERE salary < (
    SELECT MAX(salary) 
    FROM Employee
);
''')

result = cur.fetchall()

df = pd.DataFrame(result, columns=[item[0] for item in cur.description])
display(df)

Unnamed: 0,EmployeeID,Name,Job,Salary,DepartmentID
0,1,John Doe,Sales Representative,50000,1
1,2,Jane Smith,Marketing Specialist,60000,2
2,3,Mike Johnson,Financial Analyst,70000,3
3,4,Emily Davis,HR Manager,55000,4
4,5,Robert Brown,Sales Representative,75000,1
5,6,Alice White,Marketing Specialist,80000,2
6,8,Olivia Miller,HR Manager,70000,4
7,9,William Turner,Sales Representative,55000,1
8,10,Sophia Martin,Financial Analyst,60000,2


### Display the information about employees who have the same job as 'John Doe'

#### Sub-Query

In [10]:
cur.execute('''
SELECT Job
FROM Employee
WHERE Name = 'John Doe'
''')

result = cur.fetchall()

df = pd.DataFrame(result, columns=[item[0] for item in cur.description])
display(df)

Unnamed: 0,Job
0,Sales Representative


#### Main-Query

In [11]:
cur.execute('''
SELECT *
FROM Employee
WHERE Job = (
    SELECT Job
    FROM Employee
    WHERE Name = 'John Doe'
);
''')

result = cur.fetchall()

df = pd.DataFrame(result, columns=[item[0] for item in cur.description])
display(df)

Unnamed: 0,EmployeeID,Name,Job,Salary,DepartmentID
0,1,John Doe,Sales Representative,50000,1
1,5,Robert Brown,Sales Representative,75000,1
2,9,William Turner,Sales Representative,55000,1


# Pairwise Comparison Sub-Query

### A pairwise comparison sub-query is a sub-query that involves comparing pairs of values from different rows in a table.

### Display the Information of those Employees whose EmployeeID and Salary match those of Employees with the Job titile 'Sales Representative'

#### Sub-Query

In [12]:
cur.execute('''
SELECT EmployeeID, Salary
FROM Employee
WHERE Job = 'Sales Representative'
''')

result = cur.fetchall()

df = pd.DataFrame(result, columns=[item[0] for item in cur.description])
display(df)

Unnamed: 0,EmployeeID,Salary
0,1,50000
1,5,75000
2,9,55000


#### Main-Query

In [12]:
cur.execute('''
SELECT *
FROM Employee
WHERE (EmployeeID, Salary) IN (
    SELECT EmployeeID, Salary
    FROM Employee
    WHERE Job = 'Sales Representative'
);
''')

result = cur.fetchall()

df = pd.DataFrame(result, columns=[item[0] for item in cur.description])
display(df)

Unnamed: 0,EmployeeID,Name,Job,Salary,DepartmentID
0,1,John Doe,Sales Representative,50000,1
1,5,Robert Brown,Sales Representative,75000,1
2,9,William Turner,Sales Representative,55000,1


### Display the Information of those Employees whose Name and Salary match those of Employees with the DepartmentID '3'

#### Sub-Query

In [14]:
cur.execute('''
SELECT Name, Salary
FROM Employee
WHERE DepartmentID = 3
''')

result = cur.fetchall()

df = pd.DataFrame(result, columns=[item[0] for item in cur.description])
display(df)

Unnamed: 0,Name,Salary
0,Mike Johnson,70000
1,Daniel Lee,90000


#### Main-Query

In [15]:
cur.execute('''
SELECT *
FROM Employee
WHERE (Name, Salary) IN (
    SELECT Name, Salary
    FROM Employee
    WHERE DepartmentID = 3
);
''')

result = cur.fetchall()

df = pd.DataFrame(result, columns=[item[0] for item in cur.description])
display(df)

Unnamed: 0,EmployeeID,Name,Job,Salary,DepartmentID
0,3,Mike Johnson,Financial Analyst,70000,3
1,7,Daniel Lee,Senior Financial Analyst,90000,3


# Non-Pairwise Comparison Sub-Query

### A Non-Pairwise Comparison Subquery is a subquery in which each item is compared independently.

### Display the Information of those Employees whose EmployeeID and Salary match those of Employees with the Job titile 'Sales Representative'

#### Sub-Query

In [16]:
cur.execute('''
SELECT EmployeeID
FROM Employee
WHERE Job = 'Sales Representative'
''')

result = cur.fetchall()

df = pd.DataFrame(result, columns=[item[0] for item in cur.description])
display(df)

Unnamed: 0,EmployeeID
0,1
1,5
2,9


In [17]:
cur.execute('''
SELECT Salary
FROM Employee
WHERE Job = 'Sales Representative'
''')

result = cur.fetchall()

df = pd.DataFrame(result, columns=[item[0] for item in cur.description])
display(df)

Unnamed: 0,Salary
0,50000
1,75000
2,55000


#### Main-Query

In [18]:
cur.execute('''
SELECT *
FROM Employee
WHERE EmployeeID IN (
    SELECT EmployeeID
    FROM Employee
    WHERE Job = 'Sales Representative'
)
AND Salary IN (
    SELECT Salary
    FROM Employee
    WHERE Job = 'Sales Representative'
);
''')

result = cur.fetchall()

df = pd.DataFrame(result, columns=[item[0] for item in cur.description])
display(df)

Unnamed: 0,EmployeeID,Name,Job,Salary,DepartmentID
0,1,John Doe,Sales Representative,50000,1
1,5,Robert Brown,Sales Representative,75000,1
2,9,William Turner,Sales Representative,55000,1


### Display the Information of those Employees whose Name and Salary match those of Employees with the DepartmentID '3'

#### Sub-Query

In [19]:
cur.execute('''
SELECT Name
FROM Employee
WHERE DepartmentID = 3
''')

result = cur.fetchall()

df = pd.DataFrame(result, columns=[item[0] for item in cur.description])
display(df)

Unnamed: 0,Name
0,Mike Johnson
1,Daniel Lee


In [20]:
cur.execute('''
SELECT Salary
FROM Employee
WHERE DepartmentID = 3
''')

result = cur.fetchall()

df = pd.DataFrame(result, columns=[item[0] for item in cur.description])
display(df)

Unnamed: 0,Salary
0,70000
1,90000


#### Main-Query

In [21]:
cur.execute('''
SELECT *
FROM Employee
WHERE Name IN (
    SELECT Name
    FROM Employee
    WHERE DepartmentID = 3
)
AND Salary IN (
    SELECT Salary
    FROM Employee
    WHERE DepartmentID = 3
);
''')

result = cur.fetchall()

df = pd.DataFrame(result, columns=[item[0] for item in cur.description])
display(df)

Unnamed: 0,EmployeeID,Name,Job,Salary,DepartmentID
0,3,Mike Johnson,Financial Analyst,70000,3
1,7,Daniel Lee,Senior Financial Analyst,90000,3


# Multiple Row Sub-Query

### A multiple-row subquery is a subquery which produce multi-row results in the inner query

### Display the information of the Employees who has the job as 'HR Manager'

#### Sub-Query

In [22]:
cur.execute('''
SELECT EmployeeID
FROM Employee
WHERE Job = "HR Manager"
''')

result = cur.fetchall()

df = pd.DataFrame(result, columns=[item[0] for item in cur.description])
display(df)

Unnamed: 0,EmployeeID
0,4
1,8


#### Main-Query

In [23]:
cur.execute('''
SELECT *
FROM Employee
WHERE EmployeeID IN (
    SELECT EmployeeID
    FROM Employee
    WHERE Job = "HR Manager"
);
''')

result = cur.fetchall()

df = pd.DataFrame(result, columns=[item[0] for item in cur.description])
display(df)

Unnamed: 0,EmployeeID,Name,Job,Salary,DepartmentID
0,4,Emily Davis,HR Manager,55000,4
1,8,Olivia Miller,HR Manager,70000,4


# Scalar Sub-Query

### Scalar Sub-Query is a Sub-Query which will always return just one row and one column.

### Display the employees who's salary is more than the average salary earned by all employees

#### Sub-Query

In [24]:
cur.execute('''
SELECT AVG(Salary)
FROM Employee;
''')

result = cur.fetchall()

df = pd.DataFrame(result, columns=[item[0] for item in cur.description])
display(df)

Unnamed: 0,AVG(Salary)
0,66500.0


#### Main-Query

In [25]:
cur.execute('''
SELECT * 
FROM Employee
WHERE Salary > (
    SELECT AVG(Salary)
    FROM Employee
);
''')

result = cur.fetchall()

df = pd.DataFrame(result, columns=[item[0] for item in cur.description])
display(df)

Unnamed: 0,EmployeeID,Name,Job,Salary,DepartmentID
0,3,Mike Johnson,Financial Analyst,70000,3
1,5,Robert Brown,Sales Representative,75000,1
2,6,Alice White,Marketing Specialist,80000,2
3,7,Daniel Lee,Senior Financial Analyst,90000,3
4,8,Olivia Miller,HR Manager,70000,4


### Display the employees with a salary greater than the salary of employee 'Mike Johnson'

#### Sub-Query

In [27]:
cur.execute('''
SELECT Salary
FROM Employee
WHERE Name = 'Mike Johnson'
''')

result = cur.fetchall()

df = pd.DataFrame(result, columns=[item[0] for item in cur.description])
display(df)

Unnamed: 0,Salary
0,70000


#### Main-Query

In [28]:
cur.execute('''
SELECT *
FROM Employee
WHERE Salary > (
    SELECT Salary
    FROM Employee
    WHERE Name = 'Mike Johnson'
);
''')

result = cur.fetchall()

df = pd.DataFrame(result, columns=[item[0] for item in cur.description])
display(df)

Unnamed: 0,EmployeeID,Name,Job,Salary,DepartmentID
0,5,Robert Brown,Sales Representative,75000,1
1,6,Alice White,Marketing Specialist,80000,2
2,7,Daniel Lee,Senior Financial Analyst,90000,3


# Inline View Sub-Query

### An inline query is a query in the FROM clause. You select data from it as you would a table.
### It is used within the FROM clause of a SELECT statement

### Display the names and salaries of employees with the job title 'HR Manager' from the 'Employee' table, sorted in descending order based on their salaries

#### Sub-Query

In [29]:
cur.execute('''
SELECT Name, Salary
FROM Employee
WHERE Job = 'HR Manager';
''')

result = cur.fetchall()

df = pd.DataFrame(result, columns=[item[0] for item in cur.description])
display(df)

Unnamed: 0,Name,Salary
0,Emily Davis,55000
1,Olivia Miller,70000


#### Main-Query

In [30]:
cur.execute('''
SELECT *
FROM (
    SELECT Name, Salary
    FROM Employee
    WHERE Job = 'HR Manager'
)
ORDER BY SALARY DESC;
''')

result = cur.fetchall()

df = pd.DataFrame(result, columns=[item[0] for item in cur.description])
display(df)

Unnamed: 0,Name,Salary
0,Olivia Miller,70000
1,Emily Davis,55000


# Corelated Sub-Query

### A Sub-Query which is related to the Outer-Query is called Correlated Sub-Query

### Find Employees with Salaries Greater Than Their Department Average:

In [8]:
cur.execute('''
SELECT *
FROM Employee e1
WHERE Salary > (
    SELECT AVG(Salary)
    FROM Employee e2
    WHERE e2.DepartmentID = e1.DepartmentID
)
''')

result = cur.fetchall()

df = pd.DataFrame(result, columns=[item[0] for item in cur.description])
display(df)

Unnamed: 0,EmployeeID,Name,Job,Salary,DepartmentID
0,5,Robert Brown,Sales Representative,75000,1
1,6,Alice White,Marketing Specialist,80000,2
2,7,Daniel Lee,Senior Financial Analyst,90000,3
3,8,Olivia Miller,HR Manager,70000,4


### Find Employees with Salaries Greater Than Their Jobs Average:

In [32]:
cur.execute('''
SELECT *
FROM Employee e1
WHERE Salary >= (
    SELECT AVG(Salary)
    FROM Employee e2
    WHERE e2.Job = e1.Job
)
''')

result = cur.fetchall()

df = pd.DataFrame(result, columns=[item[0] for item in cur.description])
display(df)

Unnamed: 0,EmployeeID,Name,Job,Salary,DepartmentID
0,3,Mike Johnson,Financial Analyst,70000,3
1,5,Robert Brown,Sales Representative,75000,1
2,6,Alice White,Marketing Specialist,80000,2
3,7,Daniel Lee,Senior Financial Analyst,90000,3
4,8,Olivia Miller,HR Manager,70000,4
