In [None]:
# This cell imports the pandas libraries needed for the tutorial
import pandas as pd
import sqlalchemy  
import pymongo 

# Practical skills development Tutorial - Databases

### 17th November 2025

#### Mary Garvey

## Answers

In [None]:
# setup the Postgresql environment
%load_ext sql
%run sql_init.ipynb

In [None]:
DB_CONNECTION=sqlalchemy.create_engine(DB_CONNECTION_STRING)
%sql DB_CONNECTION

Most relational database management systems will have a set of data dictionary tables (metadata) that contain useful things like what tables or views you have access to. PostgresSQL has a catalog and a table called `pg_tables` that can be queried using SQL to find out what tables you have.

In [None]:
%%sql
/* check that the cellphone tables are still there */
SELECT * FROM pg_catalog.pg_tables
WHERE tablename like 'cellphone%'; 

If this returns no rows, please run the TM351_SQL_Tutorial notebook first.

You can also check what other tables you have. By default the query will return the data dictionary tables, so let's remove them from the query:

In [None]:
%%sql
SELECT * FROM pg_catalog.pg_tables
WHERE schemaname != 'pg_catalog' AND 
    schemaname != 'information_schema';

### Name that Query

**Query 1**

In [None]:
%%sql
/* retrieve all rows from cellphone_users */

SELECT * FROM cellphone_users;

**Query 2**

In [None]:
%%sql
/*  retrieve cellphones released after the 1st day of 2022 */

SELECT brand, model, release_date 
FROM cellphone_data
WHERE release_date > '2022-01-01';

**Query 3**

In [None]:
%%sql
/*  retrieve brand and price of all Samsung and OnePlus phones over £600 */

SELECT brand, price 
FROM cellphone_data
WHERE (brand = 'Samsung' OR brand = 'OnePlus') 
AND price >= 600;

**Query 4**

In [None]:
%%sql
/*  retrieve brand and price of all Samsungs or OnePlus phones over £600.
    Notice the subtle difference from previous query */

SELECT brand, price 
FROM cellphone_data
WHERE brand = 'Samsung' OR (brand = 'OnePlus' 
AND price >= 600);

**Query 5**

In [None]:
%%sql
SELECT brand, ROUND(AVG(price),2) AS average_price FROM cellphone_data 
GROUP BY brand
ORDER BY average_price DESC;

**Query 6**

In [None]:
%%sql
SELECT AVG(price) FROM cellphone_data GROUP BY brand;

### Exercises: Why the error?

**Query 7**

In [None]:
%%sql
SELECT brand, model, rating 
FROM cellphone_data, cellphone_ratings 
WHERE cellphone_id = cellphone_id
AND rating < 3;

We can see from the error message that it does not like the last line, this is because `cellphone_id` appears in both tables. Including the table name is optional when referring to a column, but if two or more tables in a query have a column with the same name, then you must include either the table name or a table alias to distinguish between the columns with the same names.

For example:

In [None]:
%%sql
SELECT brand, model, rating 
FROM cellphone_data, cellphone_ratings 
WHERE cellphone_data.cellphone_id = cellphone_ratings.cellphone_id
    AND rating < 3;

Or use a table alias. 

In [None]:
%%sql
SELECT brand, model, rating 
FROM cellphone_data cd, cellphone_ratings cr 
WHERE cd.cellphone_id = cr.cellphone_id
    AND rating < 3;

Note, if you do include aliases, then they must be used throughout the query when referring to a column, otherwise you will get a different error message:

In [None]:
%%sql
SELECT brand, model, rating 
FROM cellphone_data cd, cellphone_ratings cr 
WHERE cellphone_data.cellphone_id = cr.cellphone_id
    AND rating < 3;

**Query 8**

In [None]:
%%sql
SELECT cellphone_id, brand, model, rating 
FROM cellphone_data cd, cellphone_ratings cr
WHERE cd.cellphone_id = cr.cellphone_id
    AND rating > 7
ORDER BY rating desc;

This is similar to above - cellphone_id appears in the SELECT statement and also needs to be distinguished, in this case using the table alias.

In [None]:
%%sql
SELECT cd.cellphone_id, brand, model, rating 
FROM cellphone_data cd, cellphone_ratings cr
WHERE cd.cellphone_id = cr.cellphone_id
    AND rating > 7
ORDER BY rating desc;

Hmm, looks like there is an outlier there with the 18 - assuming the rating should be between 1 and 10!

**Query 9**

In [None]:
%%sql
SELECT model, AVG(price) FROM cellphone_data GROUP BY operating_system;

When mixing columns and aggregate functions, as seen in this query, the column seen in the SELECT statement needs to be used in the `GROUP BY` command. In this case we want to average the prices by operating system and let's round the prices to 2 decimals:

In [None]:
%%sql
SELECT operating_system, ROUND(AVG(price),2) AS price_avg FROM cellphone_data GROUP BY operating_system;

**Query 10**

In [None]:
%%sql
SELECT * FROM cellphone_data WHERE release_date =
(SELECT MIN(release_date) FROM cellphone_data);

**Query 11**

In [None]:
%%sql
SELECT * FROM cellphone_data WHERE release_date =
(SELECT MAX(release_date) FROM cellphone_data);

For dates MIN() returns the earliest date and MAX() the latest.

**Query 12**

In [None]:
%%sql
SELECT * FROM cellphone_data WHERE brand IN
(SELECT brand FROM cellphone_data cd, cellphone_ratings cr
WHERE cd.cellphone_id = cr.cellphone_id 
    AND rating > 8);

Note, if the subquery is likely to return more than one value, then you must use `IN` instead of `=` for the comparison.

### Final queries

These queries are based on a simple DEPT and EMP scenario (where 1 department can employ many employees, each employee may work for one department):

In [None]:
%%sql

/* Drop the tables first. This is done for purposes of the tutorial. 
   Normally, you would not want to drop the tables unless necessary.

   Note, it is important that the Emp table is dropped first. 
   Why is this? (Hint, foreign key constraints)
*/

DROP TABLE IF EXISTS Emp;
DROP TABLE IF EXISTS Dept;

/* Dept and Emp Schema */

CREATE TABLE Dept (
    deptno NUMERIC(2) PRIMARY KEY,
    dname VARCHAR(20)
);

CREATE TABLE Emp (
    empNo CHAR(6) PRIMARY KEY,
    eName VARCHAR(20), 
    dob DATE, 
    sal DECIMAL(8,2), 
    gender CHAR(1) CHECK (gender IN ('M','F')),
    deptno NUMERIC(2) REFERENCES Dept
);

In [None]:
%%sql
/* insert some records */
INSERT INTO Dept VALUES (10,'Finance');
INSERT INTO Dept VALUES (20,'Sales');
INSERT INTO Dept VALUES (30,'Marketing');

INSERT INTO Emp VALUES ('1111','June','1973/08/18',35000,'F', 10);
INSERT INTO Emp VALUES ('2222','Fred','1981/04/22',50500,'M', 20);
INSERT INTO Emp VALUES ('3333','Tom','1985/07/11',21570,'M', 20);
INSERT INTO Emp VALUES ('4444','Judith','1992/11/01',44500,'F',NULL);

COMMIT;

Which of the following SQL commands answer the question given:

List the empno, name and salary of employees in department 10

<pre>
1. SELECT * FROM Emp WHERE deptno = 10; /* correct department, but we only wanted 3 columns */
2. SELECT empno, ename, sal FROM Emp; /* correct columns, but will return all rows */
3. SELECT empno, ename, sal FROM Emp WHERE deptno = 10; /* correct - right columns and rows */
4. SELECT empno, name, salary FROM Emp WHERE deptno = 10; /* correct rows, but the salary column is just called sal */
</pre>

In [None]:
%%sql 
SELECT empno, ename, sal FROM Emp WHERE deptno = 10;

Find the youngest employee:

<pre>
1. SELECT * FROM Emp WHERE dob = ‘1992-11-01’; /* this would generate the correct row based on the current data */
2. SELECT * FROM Emp WHERE empno = 4444; /* same as above, but what happens if a new, younger, employee joins the company */
3. SELECT * FROM Emp WHERE dob =
	(SELECT MAX(dob) FROM EMP); /* correct */
4. SELECT * FROM Emp WHERE dob =
	(SELECT MIN(dob) FROM EMP); 
    /* this would be the oldest - perhaps counterintuitative if you think max is the bigger number */
</pre>

In [None]:
%%sql
SELECT * FROM Emp WHERE dob =
	(SELECT MAX(dob) FROM EMP);

List the department number, name and employee names for everyone assigned a department:
<pre>
1. SELECT * FROM Dept, Emp; /* JOIN missing */
2. SELECT deptno, dname, ename FROM Dept, Emp; /* JOIN missing */
3. SELECT deptno, dname, ename FROM Dept, Emp WHERE dept.deptno = emp.deptno; 
    /* join condition added, but what about that first deptno? */
4. SELECT dept.deptno, dname, ename FROM Dept, Emp WHERE dept.deptno = emp.deptno; /* correct */
</pre>

In [None]:
%%sql
SELECT dept.deptno, dname, ename FROM Dept, Emp WHERE dept.deptno = emp.deptno;

List the average salary by department:

<pre>
1. SELECT deptno, AVG(sal) FROM Emp; /* need to use GROUP BY if mixing columns and group functions */
2. SELECT deptno, AVG(sal) FROM Emp GROUP BY deptno; /* correct */
3. SELECT deptno, AVG(sal) FROM Emp GROUP BY sal; /* need to GROUP BY the column in the SELECT statment */
4. SELECT AVG(sal) FROM Emp GROUP BY deptno; 
    /* would generate the same average result as query 2, but is meaningless without the deptno */
</pre>

In [None]:
%%sql
SELECT deptno, AVG(sal) FROM Emp GROUP BY deptno;