In [None]:
# NOTES FOR MODULE 7

In [None]:
# 7.2.2 Create Tables in SQL

# Use our final ERD as a guide

# create six tables, one for each CSV file

# These table creation statements will be our first introduction to Structured Query Language. 

# A statement is a block of code that, when executed, sends a command to the database.

#  Before we create database, review Diagrams.

In [None]:
# Review Diagrams

# recreating the same tables in the diagram in our SQL database.

# With the help of the diagram, we know the structure of this table: two columns with their data types. 
# Also, the table is already named. All we need to do is transfer over the same information.

# Start by using...

In [None]:
# The Query Tool is pgAdmin's text editor, much like VSCode is for Python.

# so lets create a table

# -- Creating tables for PH-EmployeeDB
# CREATE TABLE departments (
#      dept_no VARCHAR(4) NOT NULL,
#     dept_name VARCHAR(40) NOT NULL,
#     PRIMARY KEY (dept_no),
#     UNIQUE (dept_name)
#);

In [None]:
# CREATE TABLE is the syntax required to create a new table in SQL.

# departments is the name of the table and how it will be referenced in queries.

# So the table has been named, now the structure needs to be created. The content inside the parentheses is how we'll 
# do that.

# dept_no VARCHAR(4) NOT NULL, creates a column named "dept_no" that can hold up to four varying characters, while 
# NOT NULL tells SQL that no null fields will be allowed when importing data.

# There are times when we don't want a data field to be null. For example, the dept_no column is our primary key—each 
# row has a unique number associated with it. If we didn't have the NOT NULL constraint, then there's a chance that a 
# row (or more than one row) won't have a primary key associated with the data.

In [None]:
# What do you think would happen if one of the rows didn’t have a unique identifier?

# Not all of the data would be present in every query, which would skew analysis results and provide incomplete lists.

In [None]:
# dept_name VARCHAR(40) NOT NULL, creates a column similar to the dept_no, only the varying character count has a 
# maximum of 40.

# PRIMARY KEY (dept_no), means that the dept_no column is used as the primary key for this table.

# UNIQUE (dept_name) adds the unique constraint to the dept_name column.

# The unique constraint implies that the data in that column is unique. 
# This ensures that if the table were to be updated in the future, nothing will be duplicated.



In [None]:
# The closing parenthesis and semicolon signal that the SQL CREATE TABLE statement is complete. 

# Any code added after will need to be included in a new SQL statement. 
# A statement is a command that is set up with a certain syntax. 



In [None]:
# What does it mean when the NOT NULL constraint is applied?

# Null values are not allowed in the column.

In [None]:
# Execute the Code

# To save the table to the database, we need to execute the code. 

# In the toolbar of the pgAdmin webpage, find and click the lightning bolt symbol toward the right of the bar. 

# This button runs the code and saves our work to the database.

In [2]:
# Troubleshoot Error Messages

# Any error message will also appear in the same manner. Encountering errors will happen often and troubleshooting 
# them is a large part of being a developer. 

# Thankfully, each error message encountered will tell us why the error occurred. 
# This is great because it helps us, the developers, research and fix the problem.

In [3]:
# ERROR:  relation "departments" already exists
# SQL state: 42P07

In [None]:
# This error occurs because SQL data is persistent and cannot be overwritten if the same command is run again. 
# Once a table has been committed to a database, it is there until a different command is run to delete it.


In [None]:
# Data integrity is the quality of the data we're working with. Clean data will yield better results in analysis, 
# and maintaining the data integrity ensures greater accuracy and reliability.

# Dirty data is data that contains errors such as duplicates, undefined values (i.e., not a number, or NaN), or other 
# inconsistencies. This is why the NOT NULL constraint is in place.

In [None]:
# To avoid encountering this error, highlight the code block you want to run first, then execute it. 
# This tells pgAdmin to run only that code.



In [None]:
# Create Additional Tables

# Create another table for Employees.

# How to start create table: 

# CREATE TABLE employees (
#     emp_no INT NOT NULL,
#     birth_date DATE NOT NULL,
#     first_name VARCHAR NOT NULL,
#     last_name VARCHAR NOT NULL,
#     gender VARCHAR NOT NULL,
#     hire_date DATE NOT NULL,
#     PRIMARY KEY (emp_no)
# );

In [None]:
# Create one more together—this time with foreign keys included. 
# Add the following code to the bottom of your query editor:

# CREATE TABLE dept_manager (
# dept_no VARCHAR(4) NOT NULL,
#     emp_no INT NOT NULL,
#     from_date DATE NOT NULL,
#     to_date DATE NOT NULL,
# FOREIGN KEY (emp_no) REFERENCES employees (emp_no),
# FOREIGN KEY (dept_no) REFERENCES departments (dept_no),
#     PRIMARY KEY (emp_no, dept_no)
# );

In [None]:
# Remember that foreign keys reference the primary key of other tables. In the two lines above we can see that:

# The FOREIGN KEY constraint tells Postgres that there is a link between two tables

# The parentheses following FOREIGN KEY specify which of the current table's columns is linked to another table

# REFERENCES table_name (column_name) tells Postgres which other table uses that column as a primary key

# The primary key is similar, but there are two keys listed this time instead of just one. 

In [None]:
# One thing to keep in mind when working with foreign keys is that it's possible that data insertion will fail if the
# foreign key isn't present. 

# This is a "foreign key constraint" and in this case, it means that the new data needs a reference point 
# (such as dept_no or emp_no) to be successfully added to the table.



In [None]:
# Let's create another table for the data in salaries.csv

# CREATE TABLE salaries (
#   emp_no INT NOT NULL,
#   salary INT NOT NULL,
#   from_date DATE NOT NULL,
#   to_date DATE NOT NULL,
#   FOREIGN KEY (emp_no) REFERENCES employees (emp_no),
#   PRIMARY KEY (emp_no)
# );

In [None]:
# This code tells Postgres that our new table is named "salaries" and we'll have columns for the emp_no, salary, 
#from_date, and to_date. 

# We also have specified that certain fields aren't allowed any null space with the NOT NULL constraint, which is 
# important because we want this data to be persistent for every employee. 

# As a final step in table creation, we've also specified primary and foreign keys.



In [None]:
# Query for Confirmation

# Confirm the tables were created successfully by running a SELECT statement, which performs a query instead of 
# constructing anything.

# Think of it as asking the database a question. For example, say we want to know how many columns are in the 
# departments table. 
# How would we ask that particular question? We would create a SELECT statement, then run the code. 
# This is called "querying the database."

In [None]:
# How to query how many columns ar in the departments table

# In the editor, after the table creation statements, type 

# SELECT * FROM departments;

# The SELECT statement tells Postgres that we're about to query the database.
# The asterisk tells Postgres that we're looking for every column in a table.
# FROM departments tells pgAdmin which table to search.
# The semicolon signifies the completion of the query.
# After executing the SELECT statement, pgAdmin will automatically show the result in the Data Output tab at the bottom of the page.