# What are the SQL key words? # 

SELECT: Retrieves data from one or more tables.
FROM: Specifies the table or tables from which to retrieve data in a SELECT statement.
WHERE: Filters the rows returned in a query based on a specified condition.
ORDER BY: Sorts the result set based on one or more columns.
GROUP BY: Groups rows that have the same values in specified columns into summary rows
HAVING: Filters the results of a GROUP BY clause based on a specified condition.
INSERT INTO: Adds new rows to a table.
UPDATE: Modifies existing records in a table.
DELETE: Removes one or more rows from a table based on a specified condition.
CREATE TABLE: Defines a new table structure.
ALTER TABLE: Modifies an existing table structure.
DROP TABLE: Deletes an existing table and its data.
CREATE INDEX: Creates an index on one or more columns of a table, improving query performance.
DISTINCT: Filters out duplicate values from a result set.
JOIN: Combines rows from two or more tables based on a related column between them.
INNER JOIN, LEFT JOIN, RIGHT JOIN, FULL OUTER JOIN: Different types of joins to retrieve data based on matching or non-matching rows.
UNION, UNION ALL: Combines the result sets of two or more SELECT statements.
AS: Renames a column or table using an alias.
LIKE: Used in a WHERE clause to search for a specified pattern in a column.
BETWEEN: Filters the result set to include only rows where a column value is between two specified values.

# General Format for SQL Query #

SELECT column1, column2, ...
FROM table_name
WHERE condition;

For example:
SELECT *
FROM teams
WHERE department = 'Data Collation';

The above code selects all columns from a 'teams' table where the department is 'Data Collation.'

# SQL Snowflake Exercises #

-- Return the number of distinct CC_HOURS categories from CALL_CENTER:

SELECT COUNT(DISTINCT CC_HOURS) AS num_distinct_cc_hours
FROM SNOWFLAKE_SAMPLE_DATA.TPCDS_SF100TCL.CALL_CENTER;

-- Return the number of employees who work in the CALL_CENTER tables:

SELECT COUNT(*) AS num_employees
FROM SNOWFLAKE_SAMPLE_DATA.TPCDS_SF100TCL.CALL_CENTER;

-- Return the unique number of employees in the CALL_CENTER tables:

SELECT COUNT(DISTINCT employee_id) AS num_unique_employees
FROM SNOWFLAKE_SAMPLE_DATA.TPCDS_SF100TCL.CALL_CENTER;

-- Return the number of rows in the CALL_CENTER table:

SELECT COUNT(*) AS ROW_COUNT
FROM CALL_CENTER;

-- Return the number of rows in the CUSTOMER table:
SELECT COUNT(*) AS num_customers_cuba
FROM CUSTOMER
WHERE birth_country = 'Cuba';

-- Create a summary of the number of customers by their birth countries: 

SELECT birth_country, COUNT(*) AS num_customers
FROM CUSTOMER
GROUP BY birth_country
ORDER BY num_customers DESC;

-- Rename the COUNT(*) column to state the number_of_distinct_customers_by_country and order in ascendinG:

SELECT birth_country, COUNT(*) AS number_of_distinct_customers_by_country
FROM CUSTOMER
GROUP BY birth_country
ORDER BY number_of_distinct_customers_by_country ASC;

-- Link INVENTORY to ITEMS using ITEM_SK as the common column:

SELECT *
FROM INVENTORY
JOIN ITEM ON INVENTORY.ITEM_SK = ITEM.ITEM_SK;

-- Number of input rows, output rows after the join:

SELECT COUNT(*) AS num_input_rows
FROM INVENTORY;

SELECT COUNT(*) AS num_output_rows
FROM INVENTORY
JOIN ITEM ON INVENTORY.ITEM_SK = ITEM.ITEM_SK;

-- What would happen if you were to have multiple products with the same ITEM_SK?

I assume the join would include all matching rows, potentially resulting in duplicate rows in the output.

-- How many product items have the MIN price?

SELECT COUNT(*) AS num_products_min_price
FROM ITEM
WHERE price = (SELECT MIN(price) FROM ITEM);

-- Find the products which have the MIN price and the lowest wholesale price:

SELECT *
FROM ITEM
WHERE (price, wholesale_price) IN (SELECT MIN(price), MIN(wholesale_price) FROM ITEM);

# What is an Alias table? # 

In SQL, an alias table refers to a temporary name assigned to a table or a result set in a query. 
Aliases are used to provide a shorthand or alternative name for a table or a column, making the SQL code more readable and concise.

SELECT column1, column2
FROM long_table_name AS alias;

SELECT cc.cc_id, cc.cc_name, e.employee_id, e.employee_name
FROM SNOWFLAKE_SAMPLE_DATA.TPCDS_SF100TCL.CALL_CENTER AS cc
JOIN employees AS e ON cc.cc_id = e.call_center_id;

# Similarities between SQL and Python #

The WHERE clause.

SQL: SELECT column1, column2
FROM table_name
WHERE condition;

Python: result = [(row['column1'], row['column2']) for row in data if row['condition']]

GROUP BY

SQL: SELECT column, COUNT(*)
FROM table_name
GROUP BY column;

Python: data.sort(key=lambda x: x['column'])
result = [(key, len(list(group))) for key, group in groupby(data, key=lambda x: x['column'])]

ORDER BY

SQL: SELECT column1, column2
FROM table_name
ORDER BY column1 ASC, column2 DESC;

Python: result = sorted(data, key=lambda x: (x['column1'], x['column2']), reverse=(False, True))

JOIN

SQL: SELECT t1.column1, t2.column2   # t1=table 1, t2=table 2.
FROM table1 t1
JOIN table2 t2 ON t1.key = t2.key;

Python: result = [(row1['column1'], row2['column2']) for row1 in table1 for row2 in table2 if row1['key'] == row2['key']]

# What summary statistics can you get using SQL? #

SELECT COUNT(*) FROM table_name;  =  Count of rows.

SELECT SUM(column_name) FROM table_name;  = sum of values in a chosen column.

SELECT AVG(column_name) FROM table_name;  = mean average of values in chosen column.

SELECT MIN(column_name) FROM table_name;  = smallest value in a column's range.

SELECT MAX(column_name) FROM table_name;  = largest value in a column's range.

SELECT DISTINCT column_name FROM table_name;  = removes duplicates from table.

SELECT STDDEV(column_name), VARIANCE(column_name) FROM table_name;  = Gives SD and variance of column.