In [2]:
import psycopg2 as pg
from psycopg2 import OperationalError
import pandas as pd
import numpy as np
import warnings
import os
from dotenv import load_dotenv
warnings.filterwarnings('ignore')
warnings.warn('DelftStack')
warnings.warn('Do not show this message')

In [11]:
load_dotenv()
password = os.getenv('DBPASSWORD')

In [12]:
connection = pg.connect(dbname = 'postgres',
                       host = 'localhost',
                       user = 'postgres',
                       password = password,
                       port = 5432)
cursor = connection.cursor()

# Схема БД

![](images/db_model.png)

Вывести распределение (количество) клиентов по сферам деятельности, отсортировав результат по убыванию количества.

In [22]:
cursor.execute('''
select job_industry_category, count(job_id)
from jobs
group by job_industry_category
order by count(job_id) DESC
''')
pd.DataFrame(cursor.fetchall())

Unnamed: 0,0,1
0,Manufacturing,149
1,,144
2,Financial Services,144
3,Health,128
4,Property,108
5,Retail,107
6,IT,100
7,Argiculture,67
8,Entertainment,53
9,Telecommunications,49


Найти сумму транзакций за каждый месяц по сферам деятельности, отсортировав по месяцам и по сфере деятельности.

In [34]:
cursor.execute('''
select EXTRACT(month FROM cast(transaction_date as TIMESTAMP)) as month,
job_industry_category,
COUNT(transaction_id)
from transaction
inner join customer on transaction.customer_id = customer.customer_id
inner join jobs on jobs.job_id = customer.job_id
group by month,job_industry_category
order by month,job_industry_category
''')
pd.DataFrame(cursor.fetchall())

Unnamed: 0,0,1,2
0,1,Argiculture,41
1,1,Entertainment,58
2,1,Financial Services,319
3,1,Health,261
4,1,IT,100
...,...,...,...
115,12,Manufacturing,299
116,12,Property,117
117,12,Retail,140
118,12,Telecommunications,26


Вывести количество онлайн-заказов для всех брендов в рамках подтвержденных заказов клиентов из сферы IT.

In [48]:
cursor.execute('''
select brand,
COUNT(transaction_id)
from transaction
inner join products on transaction.product_id = products.product_id
inner join customer on transaction.customer_id = customer.customer_id
inner join jobs on jobs.job_id = customer.job_id
where job_industry_category = 'IT'
and order_status = 'Approved'
and online_order = TRUE
group by brand
''')
pd.DataFrame(cursor.fetchall())

Unnamed: 0,0,1
0,OHM Cycles,88
1,WeareA2B,74
2,Trek Bicycles,66
3,Solex,105
4,Norco Bicycles,72
5,Giant Bicycles,135


Найти по всем клиентам сумму всех транзакций (list_price), максимум, минимум и количество транзакций, отсортировав результат по убыванию суммы транзакций и количества клиентов. Выполните двумя способами: используя только group by и используя только оконные функции. Сравните результат.
### group by - каждая строка будет содержать уникальные имя, фамилию и результат агрегирующей функции

In [54]:
cursor.execute('''
select first_name,last_name,
SUM(list_price), MAX(list_price), MIN(list_price), COUNT(transaction_id)
from transaction
inner join products on transaction.product_id = products.product_id
inner join customer on transaction.customer_id = customer.customer_id
group by first_name,last_name
ORDER BY COUNT(transaction_id) DESC, SUM(list_price) DESC
''')
pd.DataFrame(cursor.fetchall())

Unnamed: 0,0,1,2,3,4,5
0,Corabelle,,18932.18,1992.93,12.01,18
1,Frazer,Searston,19897.83,2005.66,290.62,14
2,Jillie,Fyndon,14540.49,1810.00,358.39,14
3,Hal,Braddon,14171.59,1873.97,60.34,14
4,Kristien,Robberts,16342.95,1977.36,60.34,13
...,...,...,...,...,...,...
3486,Hannie,Wodham,499.53,499.53,499.53,1
3487,Barney,Tilling,416.98,416.98,416.98,1
3488,Hinze,Bullent,290.62,290.62,290.62,1
3489,Leonora,Iglesia,230.91,230.91,230.91,1


### оконные функции выведутся все записи из transaction по каждому клиенту

In [67]:
cursor.execute('''
select first_name,last_name,
SUM(list_price) OVER(PARTITION BY customer.customer_id),
MAX(list_price) OVER(PARTITION BY customer.customer_id),
MIN(list_price) OVER(PARTITION BY customer.customer_id),
COUNT(transaction_id) OVER(PARTITION BY customer.customer_id)
from transaction
inner join products on transaction.product_id = products.product_id
inner join customer on transaction.customer_id = customer.customer_id
ORDER BY COUNT(transaction_id) OVER(PARTITION BY customer.customer_id) DESC,
SUM(list_price) OVER(PARTITION BY customer.customer_id) DESC
''')
pd.DataFrame(cursor.fetchall())

Unnamed: 0,0,1,2,3,4,5
0,Frazer,Searston,19897.83,2005.66,290.62,14
1,Frazer,Searston,19897.83,2005.66,290.62,14
2,Frazer,Searston,19897.83,2005.66,290.62,14
3,Frazer,Searston,19897.83,2005.66,290.62,14
4,Frazer,Searston,19897.83,2005.66,290.62,14
...,...,...,...,...,...,...
19992,Hannie,Wodham,499.53,499.53,499.53,1
19993,Barney,Tilling,416.98,416.98,416.98,1
19994,Hinze,Bullent,290.62,290.62,290.62,1
19995,Leonora,Iglesia,230.91,230.91,230.91,1


Найти имена и фамилии клиентов с минимальной/максимальной суммой транзакций за весь период (сумма транзакций не может быть null). Напишите отдельные запросы для минимальной и максимальной суммы.

In [76]:
# Минимальная сумма транзакций
cursor.execute('''
select first_name,last_name,
SUM(list_price)
from transaction
inner join products on transaction.product_id = products.product_id
inner join customer on transaction.customer_id = customer.customer_id
group by first_name,last_name
ORDER BY SUM(list_price) ASC
LIMIT 1
''')
pd.DataFrame(cursor.fetchall())

Unnamed: 0,0,1,2
0,Reyna,Braizier,100.35


In [77]:
# Максимальная сумма транзакций
cursor.execute('''
select first_name,last_name,
SUM(list_price)
from transaction
inner join products on transaction.product_id = products.product_id
inner join customer on transaction.customer_id = customer.customer_id
group by first_name,last_name
ORDER BY SUM(list_price) DESC
LIMIT 1
''')
pd.DataFrame(cursor.fetchall())

Unnamed: 0,0,1,2
0,Frazer,Searston,19897.83


Вывести только самые первые транзакции клиентов. Решить с помощью оконных функций.

In [89]:
cursor.execute('''
select distinct first_name, last_name,
FIRST_VALUE(transaction_id) OVER(PARTITION BY first_name, last_name ORDER BY transaction_date)
from transaction
inner join products on transaction.product_id = products.product_id
inner join customer on transaction.customer_id = customer.customer_id
ORDER BY first_name, last_name ASC
''')
pd.DataFrame(cursor.fetchall())

Unnamed: 0,0,1,2
0,Aarika,Magog,3870
1,Aarika,Van Vuuren,16606
2,Abagail,Tordiffe,933
3,Abba,Masedon,11296
4,Abbey,Murrow,7562
...,...,...,...
3486,Zollie,Exell,15183
3487,Zonnya,Fayter,2097
3488,Zonnya,Tolland,751
3489,Zorina,Bosomworth,13170


Вывести имена, фамилии и профессии клиентов, между транзакциями которых был максимальный интервал (интервал вычисляется в днях)

In [123]:
cursor.execute('''
with temp_table as (
    select first_name, last_name, job_title, transaction_date,
    LEAD(transaction_date) OVER(PARTITION BY first_name,
    last_name, job_title ORDER BY transaction_date ASC),
    LEAD(transaction_date) OVER(PARTITION BY first_name,last_name, job_title ORDER BY transaction_date ASC) -
    transaction_date as day_interval
    from transaction
    inner join products on transaction.product_id = products.product_id
    inner join customer on transaction.customer_id = customer.customer_id
    inner join jobs on customer.job_id = jobs.job_id
)
select distinct first_name, last_name, job_title, MAX(day_interval)
from temp_table
group by first_name, last_name, job_title
HAVING MAX(day_interval) IS NOT NULL
ORDER BY MAX(day_interval) DESC
LIMIT 1
''')
pd.DataFrame(cursor.fetchall())

Unnamed: 0,0,1,2,3
0,Susanetta,,Legal Assistant,357


In [122]:
connection.rollback()