<a href="https://colab.research.google.com/github/Ruoro/hello-world/blob/main/SQL_Cheat_Sheet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Load  sql server'

In [2]:
%load_ext sql

In [None]:
# DROP a tabble
%sql DROP TABLE IF EXISTS users


In [None]:
# CREATE A TABLE
%%sql

CREATE TABLE users (
  user_id INT,
  user_first_name VARCHAR(30) NOT NULL,
  user_last_name VARCHAR (40) NOT NULL,
  user_email_id VARCHAR (60) NOT NULL,
  user_email_validated BOOLEAN,
  user_password VARCHAR (200),
  user_role VARCHAR (1),
  is_active BOOLEAN,
  created_dt DATE DEFAULT CURRENT_DATE
)

In [3]:
# DROP a sequence 
%sql DROP SEQUENCE IF EXISTS users_user_id_seq

Environment variable $DATABASE_URL not set, and no connect string given.
Connection info needed in SQLAlchemy format, example:
               postgresql://username:password@hostname/dbname
               or an existing connection: dict_keys([])


In [4]:
%sql CREATE SEQUENCE user_name_id_seq

UsageError: Line magic function `%SQL` not found.


In [None]:
_# assign the sequence to the column 
%sql ALTER TABLE users ALTER COLUMN user_id SET DEFAULT nextval('users_userid_seq')

In [None]:
# To find out the information on the columns 
%%sql
SELECT * FROM information_schema.columns
WHERE table_name = 'users'
ORDER BY ordinal_position

In [None]:
# alter infomation on the column 
%%sql 
ALTER TABLE users
  ALTER COLUMN user_email_validated SET DEFAULT FALSE,
  ALTER COLUMN is_active SET DEFAULT FALSE
  
  //
   
  ADD COLUMN last_updated_ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP

In [None]:
## Different types of constraint
# - NOT NULL
# CHECK
# FOREIGN KEY
# PRIMARY KEY
# UNIQUE

In [None]:
# code to check for the constraints defined in the sql table 

%%sql
SELECT table_catalog, 
    table_name,
    constraint_type,
    constraint_name
FROM information_schema.table_constraints
WHERE table_name = 'users'

In [None]:
# to add primary key constraint

%sql ALTER TABLE users ADD PRIMARY KEY (user_id)

In [None]:
# to drop a constraint
%sql ALTER TABLE users DROP CONSTRAINT user_pkey


In [None]:
# add constraint with users selected name 
%sql ALTER TABLE users ADD CONSTRAINT userpk PRIMARY KEY (user_id)


In [None]:
# To make suere the column only accepts the two inputs

%%sql

ALTER TABLE users
  ADD CHECK (user_role IN ('U', 'A'))

In [None]:
%%sql
CREATE TABLE user_logins ( 
    user_login_id SERIAL PRIMARY KEY, 
    user_id INT, 
    user_login_ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    user_ip_addr VARCHAR(20) 

In [None]:
%%sql
ALTER TABLE user_logins 
  ADD FOREIGN KEY (user_id) 
  REFERENCES users(user_id) 

In [None]:
# to drop a table with constraints
%sql  DROP TABLE users CASCADE

In [None]:
%%sql
CREATE SEQUENCE users_user_id_seq

START WITH 7
MINVALUE 1
MAXVALUE 10000
INCREMENT BY 2

In [None]:
# ALTER SEQUENCE 
%%sql 
ALTER SEQUENCE test_seq
  INCREMENT BY 1 
  RESTART WITH 101 

In [None]:
# truncate is faster when deleting info from the table

In [None]:
## PARTITIONING 


Let s understand how we can take care of list partitioning of tables.
It is primarly used to create partitions based up on the values.

« Here are the steps involved in creating table using list partitioning strategy.

« Create table using PARTITION BY LIST 

« Add default and value specific partitions

» Validate by inserting data into the table 

« We can detach as well as drop the partitions from the table.

In [9]:
# Create a table with a partition of user role
# Partition by LIST
%%sql

CREATE TABLE users (
  user_id INT,
  user_first_name VARCHAR(30) NOT NULL,
  user_last_name VARCHAR (40) NOT NULL,
  user_email_id VARCHAR (60) NOT NULL,
  user_email_validated BOOLEAN,
  user_password VARCHAR (200),
  user_role VARCHAR (1),
  is_active BOOLEAN,
  created_dt DATE DEFAULT CURRENT_DATE
)PARTITION BY LIST user_role

SyntaxError: ignored

In [None]:
# Create the partition tables

%%sql

CREATE TABLE user_part_u
PARTITION OF user_part 
FOR VALUES IN ('A')

In [None]:
## RANGE PARTITIONING
%%sql

CREATE TABLE users_range_part (
  user_id INT,
  user_first_name VARCHAR(30) NOT NULL,
  user_last_name VARCHAR (40) NOT NULL,
  user_email_id VARCHAR (60) NOT NULL,
  user_email_validated BOOLEAN,
  user_password VARCHAR (200),
  user_role VARCHAR (1) NOT NULL DEFAULT 'U', -- U and A
  is_active BOOLEAN,
  created_dt DATE DEFAULT CURRENT_DATE
  last_updated_ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  PRIMARY KEY (created_dt, user_id)
)PARTITION BY RANGE (created_dt)

In [None]:
# CREATE RANGE PARTIOTION OF DEFAULT VALUES
%%sql

CREATE TABLE users_range_part_default
PARTITION OF users_range_part DEFAULT

In [None]:
# RANGE PARTITION FOR RECORDS FROM 2016
%%sql

CREATE TABLE users_range_part_2016
PARTITION OF users_range_part 
FOR VALUES FROM ('2016-01-01') TO ('2016-12-31')

In [None]:
%%sql

CREATE TABLE users_range_part_2017
PARTITION OF users_range_part 
FOR VALUES FROM ('2017-01-01') TO ('2017-12-31')

In [None]:
%%sql

CREATE TABLE users_range_part_2018
PARTITION OF users_range_part 
FOR VALUES FROM ('2018-01-01') TO ('2018-12-31')

In [None]:
# ----NB
# USE MAIN TABLE TO INSERT DATA EVEN IN PARTITIONED TABLES
INSERT INTO users_range_part
  (user_first_name, user_last_name, user_email_id, created_dt)
VALUES 
 (ETC < ETC > ETC)

In [10]:
## DETACH PARTITIONS
%%sql
ALTER TABLE users_range_part
  DETATCH PARTITION users_range_part_2016

In [None]:
%%sql
ALTER TABLE users_range_part
  DETATCH PARTITION users_range_part_2017

In [15]:
# AUTOMATE CREATE PARTITION BY MONTH
import pandas as pd 
from pandas.tseries.offsets import MonthBegin, MonthEnd 

months = pd.date_range(start='1/1/2016', end='12/31/2016', freq="1M"),

for month in months:
  begin_date = month - MonthBegin(1) 
  end_date = month + MonthEnd(0) 
  print(str(month) [:7].replace('-', ''), end=':') 
  print(str(begin_date).split(' ')[0], end=':') 
  print(str(end_date).split(' ')[0])

Datetim:DatetimeIndex(['2016-01-01',:DatetimeIndex(['2016-01-31',


In [None]:
# PARTITION BY HARSH
# create table
 %%sql

CREATE TABLE users_harsh_part (
  user_id SERIAL,
  user_first_name VARCHAR(30) NOT NULL,
  user_last_name VARCHAR (40) NOT NULL,
  user_email_id VARCHAR (60) NOT NULL,
  user_email_validated BOOLEAN,
  user_password VARCHAR (200),
  user_role VARCHAR (1) NOT NULL DEFAULT 'U', --U and A
  is_active BOOLEAN,
  created_dt DATE DEFAULT CURRENT_DATE
  last_updated_ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  PRIMARY KEY (user_id)
)PARTITION BY HARSH (user_id)

In [None]:
%%sql

CREATE TABLE users_harsh_part_0_of_8
PARTITION OF users_harsh_part 
FOR VALUES WITH (modulus 8, remainder 0)

In [None]:
%%sql

CREATE TABLE users_harsh_part_1_of_8
PARTITION OF users_harsh_part 
FOR VALUES WITH (modulus 8, remainder 1)

## all the way to 7 to get 8 paritions based on the modulus


### Usage Scenarios
Let us go through some of the usage scenarios with respect to partitioning.
> It is typically used to manage large tables so that the tables does not grow abnormally over a period of time.

> Partitioning is quite often used on top of log tables, reporting tables etc. %

> If a log table is partitioned and if we want to have data for 7 years, partitions older than 7 years can be quickly dropped.

> Dropping partitions to clean up huge chunk of data is much faster compared to running delete command on non partitioned table.

> For tables like order with limited set of satuses, we often use list paritioning based up on the status. It can be 2 partitions (CLOSED
orders and ACTIVE orders) or separate partition for each status. 

>> As most of the operations will be on Active Orders, this approach can significantly improve the performance.

> In case of log tables, where we might want to retain data for several years, we tend to use range partition on date column. If we use st |
partition, then we might end up in duplication of data unnecessarly.