# 1. Data types

# 1.1. Numeric types

In [None]:
INT, INTEGER
-- Range from -2,147,483,648 to 2,147,483,647
-- Unsigned range from 0 to 4,294,967,295

BIGINT

SMALLINT
-- -32,768 -> 32,768

In [None]:
DECIMAL(precision, scale), NUMERIC(precision, scale)
-- Exact numeric values with a specified precision and scale
-- - Scale: the numbers of digit after the point
-- - Precision: the total numbers of digit of the value

FLOAT, REAL, DOUBLE PRECISION
-- Approximate numeric values with optional precision
-- larger range than decimal

|Feature|FLOAT|DECIMAL|
|-|-|-|
|Storage|Approximate|Exact|
|Range|Large|Limited|
|Precision|Determined by significant digits|Determined by specified digits and decimal places|
|Performance|Generally faster|May be slower|
|Use cases|Scientific calculations, engineering data|Financial calculations, monetary values|

## 1.2. String types

In [None]:
CHAR(n)
-- Fixed-length character string with a specified length

VARCHAR(n)
-- Variable-length character string with a maximum length of n

TEXT
-- Variable-length character string with no specified maximum length

## 1.3. Date and Time types

In [None]:
DATE
-- Date values in YYYY-MM-DD format

In [None]:
select '3/30/2023'::date + 1
    ,'2023-03-30'::date + 1 
    ,'2023-3-30'::date + 1

In [None]:
TIME
-- Time values in HH:MM:SS format

TIMESTAMP
-- Date and time values in YYYY-MM-DD HH:MM:SS format

INTERVAL
-- A period of time

## 1.4. Other types

In [None]:
BOOLEAN
-- Represents true or false values

In [None]:
-- Binary types
BINARY(n)
-- Fixed-length binary string with a specified length

VARBINARY(n)
-- Variable-length binary string with a maximum length of n

BLOB, BYTEA
-- Binary large objects for storing binary data

In [None]:
ARRAY
-- An ordered collection of elements of the same data type

JSON, JSONB
-- JSON data type for storing JSON documents

XML
-- XML data type for storing XML documents

# 2. Manipulation

## 2.1. Type manipulation

### 2.1.1. Null

#### `coalesce` function

It takes two or more arguments and returns the first one that is not null

In [None]:
coalesce(num_orders,0)
coalesce(address,'Unknown')
coalesce(column_a,column_b)
coalesce(column_a,column_b,column_c)

#### `nullif` function

The nullif function compares two numbers, and if they are not equal, it returns the first number; if they are equal, the function returns null

In [None]:
nullif(6,7) → 6
nullif(6,6) → null 

### 2.1.2. Conversions and casting

#### `cast()` or `::`

In [None]:
cast (1234 as varchar) 
1234::varchar

#### `to_datatype` functions

The to_datatype function can take both a value or a format string

|function|purpose|
|-|-|
|to_char|converts other types to string|
|to_number|to numeric|
|to_date|converts other types to date, with specified date parts|
|to_timestamp|to date, with specified date and time parts|

Sometimes the database automatically converts a data type. This is called **type coercion**

In [None]:
to_date(date_column, 'DD/MM/YYYY')
-- YYYY: Year with four digits
-- MM: Month with two digits
-- DD: Day of the month with two digits
-- HH: Hour (24-hour format) with two digits
-- MI: Minute with two digits
-- SS: Second with two digits
-- AM/PM: AM or PM indicator

## 2.2. Date and time manipulation

#### Return current date/time

In [None]:
select current_date -- only date
    ,current_time -- only time
    ,current_timestamp -- date & time
    ,now() -- date & time

#### `date_trunc`

reduce details of timestamp

In [None]:
select date_trunc('month','2020-10-04 12:33:35'::timestamp)
-- return value in timestamp type

#### `to_char`

In [None]:
select 
    to_char(current_date, 'YYYY-MM-DD')
    ,to_char(current_timestamp, 'YYYY-MM-DD HH24:MI:SS')
    ,to_char(current_date,'month') as MonthName
    ,to_char(current_date, 'YYYY-MM') as YearMonth
	-- return value as string

#### `date_part`, `extract`

In [None]:
select
    date_part('month', current_timestamp)
    ,extract('day' from current_timestamp)
    ,extract(month from current_timestamp) -- month or 'month' is ok
    ,extract('month' from current_timestamp)
    -- return value as numeric

### Date/time math

In [None]:
select DATEDIFF('day', '2023-01-01', '2023-12-31')

In [None]:
-- return the number of days elasped between two dates
SELECT date('2020-06-30') - date('2020-05-31') as days -- The number of days is inclusive of only one of the endpoints
    ,date('2020-05-31') - date('2020-06-30') as days

In [None]:
-- add interval to date
SELECT date('2020-06-01') + interval '7 days' as new_date -- highly recommended
    ,date('2020-06-01') + 7 as new_date
    ,date('2020-06-01') - 7 as new_date
    ,date('2020-06-01') + interval '2 month' as new_date
    ,date('2020-06-01') + interval '2 hour'

In [None]:
-- add/subtract interval to time, time to time
SELECT time '05:00' + interval '3 hours' as new_time
    ,time '05:00' - interval '3 hours' as new_time
    ,time '05:00' - time '03:00' as time_diff -- return { "hour": 2 }

## 2.3. Numeric manipulations

#### `round` function

In [None]:
SELECT ROUND(1234.56789, 3);

## 2.4. Text manipulations

#### `replace` function

In [None]:
select REPLACE('i have 10 apples','0','')

#### `split_part` function

Split text with delimiter

In [None]:
SELECT split_part('This is an example of an example string'
        ,'an example'
        ,1)
    -- output: This is
    ,split_part('This is an example of an example string'
        ,'an example'
        ,2); 
    -- output: of

-- Note that spaces in the text will be retained 
-- unless specified as part of the delimiter.

### `concat()` function

In [None]:
select 'a' || 'b' || 1
    ,concat('a', 'b', 1)

### `length()` function

In [None]:
select length('abc')

### `upper()`, `lower()`, `initcap()` function

In [None]:
select upper('abc') 
    ,lower('ABC')
    ,initcap('abC ABC')

### Wildcards

In [None]:
like -- case-sensitive
ilike -- case-insensitive

In [None]:
-- find any client's who are an LLC
select *
from client
where client_name LIKE '%LLC'; -- % = any number of characters, _ = 1 character

select *
from branch_supplier
where supplier_name LIKE '%Label%';

-- find any employee born in Oct
select * from employee
where birth_day like '____-10%';

-- find any clients who are schools
select *
from client
where client_name like '%School%';

-- tell SQL to treat wildcard characters as literal characters
select 'abc a%b abc' like '%\%%';
select 'abc a__b abc' like '%\_\_%';

### Regex

In [None]:
-- (1) tilde ~
        SELECT 'The data is about UFOs' ~ 'data' as comparison; -- "data" is a regex

        -- The ~ comparator is case sensitive. To make it case insensitive, similar to ILIKE, 
        -- use ~* (the tilde followed by an asterisk):
        SELECT 'The data is about UFOs' ~* 'DATA' as comparison;

        -- To negate the comparator, place an ! (exclamation point) before the tilde or tilde-
        -- asterisk combination:
        SELECT 'The data is about UFOs' !~ 'alligators' as comparison;

In [None]:
-- (2) period . -> match any single character
    SELECT
    'The data is about UFOs' ~ '. data' as comparison_1
    ,'The data is about UFOs' ~ '.The' as comparison_2

In [None]:
-- (3) asterisk *
    -- To match multiple characters, use the * (asterisk) symbol. This will match zero or
    -- more characters,
    SELECT 'The data is about UFOs' ~ 'data *' as comparison_1
        ,'The data is about UFOs' ~ 'data %' as comparison_2

In [None]:
-- (4) brackets []
    -- [ and ] (left and right brackets). These are
    -- used to enclose a set of characters, any one of which must match.
    SELECT 'The data is about UFOs' ~ '[Tt]he' as comparison_1
        ,'the data is about UFOs' ~ '[Tt]he' as comparison_2
        ,'tHe data is about UFOs' ~ '[Tt]he' as comparison_3
        ,'THE data is about UFOs' ~ '[Tt]he' as comparison_4
    
    -- (my explaination) it matches any variations with any one of characters in the brackets
        select '1he data is about UFOs' ~ '[tT1]he';
        
    --match a pattern that includes a number,
        SELECT 'sighting lasted 8 minutes' ~ '[0123456789] minutes' as comparison;

    -- can enter a range of characters with dash separator (-)
        SELECT 'sighting lasted 8 minutes' ~ '[0-9] minutes' as comparison;
        
    -- [0-9] Match any number
    -- [a-z] Match any lowercase letter
    -- [A-Z] Match any uppercase letter
    -- [A-Za-z0-9] Match any lower- or uppercase letter, or any number
    -- [A-z] Match any ASCII character; generally not used because it matches everything, including symbols

    -- If the desired pattern match contains more than one instance of a particular value or
    -- type of value, one option is to include as many ranges as needed, one after the other.
    -- For example, we can match a three-digit number by repeating the number range
    -- notation three times:
        SELECT 'driving on 495 south' ~ 'on [0-9][0-9][0-9]' as comparison;

    -- repeating a pattern
    -- multiple times. This can be useful when you don't know exactly how many times the
    -- pattern will repeat, but be careful to check the results to make sure you don't acciden‐
    -- tally return more matches than intended.
        SELECT
            'driving on 495 south' ~ 'on [0-9]+' as comparison_1
            ,'driving on 1 south' ~ 'on [0-9]+' as comparison_2
            ,'driving on 38east' ~ 'on [0-9]+' as comparison_3
            ,'driving on route one' ~ 'on [0-9]+' as comparison_4
            ;
        
        -- + Match the character set one or more times
        -- * Match the character set zero or more times
        -- ? Match the character set zero or one time
        -- { } Match the character set the number of times specied between the curly braces; for example, {3} matches exactly three times
        -- { , } Match the character set any number of times in a range specied by the comma-separated numbers between the curly braces; for example, {3,5} matches between three and ve times

In [None]:
-- (5) caret ^ -> find items that do not match a pattern
    SELECT
        'driving on 495 south' ~ 'on [0-9]+' as comparison_1
        ,'driving on 495 south' ~ 'on ^[0-9]+' as comparison_2
        ,'driving on 495 south' ~ '^on [0-9]+' as comparison_3

In [None]:
-- (6) backslash \ -> tell the database that a specific character is not special
    SELECT
        '"Is there a report?" she asked' ~ '\?' as comparison_1
        ,'it was filed under ^51.' ~ '^[0-9]+' as comparison_2
        ,'it was filed under ^51.' ~ '\^[0-9]+' as comparison_3

In [None]:
-- (7) match space in regex
    -- \t -> tab
    -- \r -> a carriage return -> as enter
    -- \n -> a line feed -> like shift + enter
    -- \s -> any whitespace character
    SELECT
        'spinning
        flashing
        and whirling' ~ '\n' as comparison_1
        ,'spinning
        flashing
        and whirling' ~ '\s' as comparison_2
        ,'spinning flashing' ~ '\s' as comparison_3
        ,'spinning' ~ '\s' as comparison_4

In [None]:
-- (8) parentheses ()
    SELECT
        'valid codes have the form 12a34b56c' ~ '([0-9]{2}[a-z]){3}' as comparison_1
        ,'the first code entered was 123a456c' ~ '([0-9]{2}[a-z]){3}' as comparison_2
        ,'the second code entered was 99x66y33z' ~ '([0-9]{2}[a-z]){3}' as comparison_3

In [None]:
-- (9) \y -> match a pattern from the beginning to the end of a word
    SELECT
        'I was in my car going south toward my home' ~ '\ycar\y' as comparison_1
        ,'UFO scares cows and starts stampede breaking' ~ '\ycar\y' as comparison_2
        ,'I''m a carpenter and married father of 2.5 kids' ~ '\ycar\y' as comparison_3
        ,'It looked like a brown boxcar way up into the sky' ~ '\ycar\y' as comparison_4
        ;
    
    -- Of course, in this simple example, we could have simply added spaces before and
    -- after the word "car" with the same result. The benefit of the pattern is that it will also
    -- pick up cases in which the pattern is at the beginning of a string and thus does not
    -- have a leading space:
    -- The pattern '\ycar\y' makes a case-insensitive match when "Car" is the first word, but the pattern ' car ' does not.
    SELECT 'Car lights in the sky passing over the highway' ~* '\ycar\y' as comparison_1
        ,'Car lights in the sky passing over the highway' ~* ' car ' as comparison_2
        ;

In [None]:
-- (10) \A and \Z -> To match the beginning of an entire string, use the \A special character, and to match the end of a string, use \Z:
    SELECT
        'Car lights in the sky passing over the highway' ~* '\Acar\y' as comparison_1
        ,'I was in my car going south toward my home' ~* '\Acar\y' as comparison_2
        ,'An object is sighted hovering in place over my car' ~* '\ycar\Z' as comparison_3
        ,'I was in my car going south toward my home' ~* '\ycar\Z' as comparison_4

    -- (note) an alternative to the ~ operator
        SELECT regexp_like('The data is about UFOs','data') as comparison;

In [None]:
-- (11) finding and replacing with regex
    --
    SELECT left(description,50)
        FROM ufo
        WHERE left(description,50) ~ '[0-9]+ light[s ,.]' limit 100
        ;
    
    -- split out only the needed texts -> use regex functions
    SELECT (regexp_matches(description,'[0-9]+ light[s ,.]'))[1]
        ,count(*)
        FROM ufo
        WHERE description ~ '[0-9]+ light[s ,.]'
        GROUP BY 1
        ORDER BY 2 desc
        ;
                    
    select (regexp_matches('i have 3 lights, 5 lights, 9 lights and 18 lights','[0-9]+ light[s ,.]'))[1]
        ;

    -- split out the numbers to find min, max
    SELECT min(split_part(matched_text,' ',1)::int) as min_lights
        ,max(split_part(matched_text,' ',1)::int) as max_lights
        FROM
        (
            SELECT (regexp_matches(description,'[0-9]+ light[s ,.]'))[1] as matched_text
            ,count(*)
            FROM ufo
            WHERE description ~ '[0-9]+ light[s ,.]'
            GROUP BY 1
        ) a
        ;
    
    -- replace variations of 'minutes'
    SELECT duration
        ,(regexp_matches(duration,'\m[Mm][Ii][Nn][A-Za-z]*\y'))[1] as matched_minutes
        FROM
        (
            SELECT split_part(sighting_report,'Duration:',2) as duration
            ,count(*) as reports
            FROM ufo
            GROUP BY 1
        ) a
        ;
        -- \m -> the pattern starts at the beginning of a word
        -- differences between \y and \m
            -- \m chỉ khớp ở đầu của một từ
            -- \y khớp từ đầu tới cuối 1 từ
    
    SELECT duration
        ,(regexp_matches(duration,'\m[Mm][Ii][Nn][A-Za-z]*\y'))[1] as matched_minutes
        ,regexp_replace(duration,'\m[Mm][Ii][Nn][A-Za-z]*\y','min') as replaced_text
        FROM
        (
            SELECT split_part(sighting_report,'Duration:',2) as duration
            ,count(*) as reports
            FROM ufo
            GROUP BY 1
        ) a
        ;
    
    -- standardize both 'minutes' and 'hours'
    SELECT duration
        ,(regexp_matches(duration,'\m[Hh][Oo][Uu][Rr][A-Za-z]*\y'))[1] as matched_hour
        ,(regexp_matches(duration,'\m[Mm][Ii][Nn][A-Za-z]*\y'))[1] as matched_minutes
        ,regexp_replace(
            regexp_replace(duration,'\m[Mm][Ii][Nn][A-Za-z]*\y','min')
            ,'\m[Hh][Oo][Uu][Rr][A-Za-z]*\y','hr') as replaced_text
        FROM
        (
            SELECT split_part(sighting_report,'Duration:',2) as duration
            ,count(*) as reports
            FROM ufo
            GROUP BY 1
        ) a
        ;