# First some simple operations in Postgres 
Also see [SQL Style Guide](https://www.sqlstyle.guide/)



In [1]:
%load_ext sql
%sql sqlite:///:memory:

In [14]:
%%sql
CREATE TABLE
    IF NOT EXISTS test (name varchar, age INT);

In [None]:
INSERT INTO
    test (name, age)
VALUES
    ('John Doe', 30),
    ('Jane Smith', 25),
    ('Emily Johnson', 40);

Note that you can import csv in a query, using:

```sql
COPY table_name (column1, column2, column3, ...)
FROM '/path/to/your/file.csv'
DELIMITER ','
CSV HEADER;
```
Note that the table should already exist, and its schema should match the data in the CSV file.



In [12]:
%%sql
SELECT
    *
FROM
    test

name,age
John Doe,30
Jane Smith,25
Emily Johnson,40
Rosy,36


In [None]:
%%sql
SELECT
    column_name,
    data_type,
    udt_name
FROM
    INFORMATION_SCHEMA.COLUMNS
WHERE
    table_name = 'test'

in sqlite this is `sqlite_master` or `PRAGMA table_info(table_name);` 

see https://www.sqlite.org/pragma.html#toc

In [13]:
%%sql
PRAGMA table_info(test);

cid,name,type,notnull,dflt_value,pk
0,name,varchar,0,,0
1,age,INT,0,,0


To drill deeper into (user defined) data types, use `pg_type`

In [None]:
%%sql
SELECT
    typname,
    typcategory
FROM
    pg_type
WHERE
    typname = 'varchar';


## Extensions

In [5]:
%%sql
SELECT
    *
FROM
    pg_available_extensions CREATE EXTENSION IF NOT EXISTS fuzzystrmatch;

SELECT
    levenshtein ('BOGUS', 'BORGOS');

RuntimeError: If using snippets, you may pass the --with argument explicitly.
For more details please refer: https://jupysql.ploomber.io/en/latest/compose.html#with-argument


Original error message from DB driver:
(sqlite3.OperationalError) near "CREATE": syntax error
[SQL: SELECT
    *
FROM
    pg_available_extensions CREATE EXTENSION IF NOT EXISTS fuzzystrmatch;]
(Background on this error at: https://sqlalche.me/e/20/e3q8)

If you need help solving this issue, send us a message: https://ploomber.io/community



## INSERT INTO // VALUES should be the first thing to learn



In [6]:
%%sql
INSERT INTO test (name, age)
    VALUES ('Johannes2', 36)
    RETURNING name, age;

name,age
Johannes2,36


# Alongside INSERT, use this to UPDATE 

In [7]:
%%sql
UPDATE test
SET
    name = 'Rosy'
WHERE
    name = 'Johannes2' RETURNING name,
    age;

"STRING_AGG (name, ', ')"
"John Doe, Jane Smith, Emily Johnson, Rosy"


In [None]:
%%sql
SELECT
    STRING_AGG (name, ', ')
FROM
    test


# DELETE


In [None]:
%%sql
DELETE FROM test 
WHERE age >= 40;

## Create a temp table

useful if you can't make a new table due to permissions.

```sql
CREATE TEMP TABLE temporary AS
SELECT name, age
    FROM test

-- or (less common)    
SELECT name, age
INTO TEMP TABLE temporary
    FROM test

-- then to add rows
INSERT INTO temporary
SELECT name, age
FROM test
WHERE age BETWEEN 30 AND 40;
```



In [8]:
%%sql
DROP TABLE IF EXISTS temporary;


# Datacamp notes

Create bins: use trunc() 

In [9]:
%%sql
SELECT trunc('125.5', -2);

-- Create bins
WITH bins AS ( 
    SELECT generate_series(30, 60, 5) AS lower, 
        generate_series(35,65,5) AS upper
        ),
-- Subset data to tag of interest
    ebs AS ( 
        SELECT unanswered_count 
        FROM stackoverflow 
        WHERE tag='amazon-ebs' 
        )
-- Count values in each bin
SELECT lower, upper, count(unanswered_count)
    -- left join keeps all bins
    FROM bins
        LEFT JOIN ebs
            ON unanswered_count >= lower
            AND unanswered_count < upper
    -- Group by bin bounds to create the groups
    GROUP BY lower, upper
    ORDER BY lower;


RuntimeError: (sqlite3.OperationalError) no such function: trunc
[SQL: SELECT trunc('125.5', -2);]
(Background on this error at: https://sqlalche.me/e/20/e3q8)
If you need help solving this issue, send us a message: https://ploomber.io/community



same can be done if you make 1 time-based series and then join the original table with a date_trunc.
```SQL 
LEFT JOIN sales
    ON hours=date_trunc('hour', date)
```

OR a lower and upper time-series and join `ON date >= lower AND date < upper`

# String operations


In [10]:
%%sql
SELECT split_part('a,bc,d', ',', 2);

SELECT substring('abcdef' FROM 2 FOR 3);

SELECT 'a' || 2 || 'cc'; -- or concat()

RuntimeError: (sqlite3.OperationalError) no such function: split_part
[SQL: SELECT split_part('a,bc,d', ',', 2);]
(Background on this error at: https://sqlalche.me/e/20/e3q8)
If you need help solving this issue, send us a message: https://ploomber.io/community



/*markdown
## Common issue with string categories

Inputted as 'Aple', ' apple'...

|customer | fav_fruit|
|----------|-----------|
|349 | aple|
|874 | Apple|
|703 | apple|

...

## Strategy: recode > join
```SQL
-- Step 1
CREATE TEMP TABLE recode AS
SELECT DISTINCT fav_fruit AS original, -- original, messy values
fav_fruit AS standardized -- new standardized values
FROM fruit;

-- Step 2
-- All rows: lower case, remove white space on ends
UPDATE recode
SET standardized=trim(lower(original));
-- Specific rows: correct a misspelling
UPDATE recode
SET standardized='banana'
WHERE standardized LIKE '%nn%';
-- All rows: remove any s
UPDATE recode
SET standardized=rtrim(standardized, 's');

-- Step 3
SELECT standardized,
    count(*)
    FROM fruit
        LEFT JOIN recode
        ON fav_fruit=original
    GROUP BY standardized;
```


In [None]:
%%sql
SELECT date_trunc('month', now());

SELECT generate_series('2018-01-01',
                        '2018-01-15',
                        '2 days'::interval);

-- alternative to LIKE
SELECT title, description
FROM film
WHERE to_tsvector(title) @@ to_tsquery('elf');

## Correlation matrix

Note that the correlations are calculated/inserted by row. 
```SQL
DROP TABLE IF EXISTS correlations;

CREATE TEMP TABLE correlations AS
SELECT 'profits'::varchar AS measure,
       corr(profits, profits) AS profits,
       corr(profits, profits_change) AS profits_change,
       corr(profits, revenues_change) AS revenues_change
  FROM fortune500;

INSERT INTO correlations
SELECT 'profits_change'::varchar AS measure,
       corr(profits_change, profits) AS profits,
       corr(profits_change, profits_change) AS profits_change,
       corr(profits_change, revenues_change) AS revenues_change
  FROM fortune500;

INSERT INTO correlations
SELECT 'revenues_change'::varchar AS measure,
       corr(revenues_change, profits) AS profits,
       corr(revenues_change, profits_change) AS profits_change,
       corr(revenues_change, revenues_change) AS revenues_change
  FROM fortune500;

-- Select each column, rounding the correlations
SELECT measure, 
       round(profits::numeric, 2) AS profits,
       round(profits_change::numeric, 2) AS profits_change,
       round(revenues_change::numeric, 2) AS revenues_change
  FROM correlations;
  ```

|measure	|profits	|profits_change	|revenues_change|
|--|--|--|--|
|profits|	1.00	|0.02	|0.02|
|profits_change	|0.02	|1.00	|-0.09|
|revenues_change	|0.02	|-0.09	|1.00|
