# Основные операторы PostgreSQL

Необходимо создать таблицы по следующей структуре и выполнить ряд запросов  
  
![Если изображение не загрузилось, то схема таблицы в папке проекта](Таблица_customers.png)![Если изображение не загрузилось, то схема таблицы в папке проекта](Таблица_transactions.png)

## Подготовка

In [17]:
import pandas as pd
from sqlalchemy import create_engine, text

import os
from dotenv import load_dotenv

import warnings
warnings.filterwarnings("ignore")

# Креды для подключения к PostgreSQL. Пользователя и пароль берём из .env
user = os.getenv("POSTGRES_USER")
password = os.getenv("POSTGRES_PASSWORD")
host = "localhost"
port = 5432
database = "db1"

# Создаём движок для работы с базой
engine = create_engine(f"postgresql+psycopg2://{user}:{password}@{host}:{port}/{database}")

# Функция для создания схем и таблиц
def execute_query(sql):
    with engine.connect() as conection:
        try:
            conection.execute(text(sql))
            conection.commit()
            print("Запрос успешно выполнен! 🎉🎉🎉")
        except Exception as e:
            print(f"При выполнении запроса возникла ошибка: {e}")

Посмотрим как выглядят данные

In [18]:
customers = pd.read_csv("customer.csv", delimiter=";")
customers["DOB"] = pd.to_datetime(customers["DOB"])
customers

Unnamed: 0,customer_id,first_name,last_name,gender,DOB,job_title,job_industry_category,wealth_segment,deceased_indicator,owns_car,address,postcode,state,country,property_valuation
0,1,Laraine,Medendorp,F,1953-10-12,Executive Secretary,Health,Mass Customer,N,Yes,060 Morning Avenue,2016,New South Wales,Australia,10
1,2,Eli,Bockman,Male,1980-12-16,Administrative Officer,Financial Services,Mass Customer,N,Yes,6 Meadow Vale Court,2153,New South Wales,Australia,10
2,3,Arlin,Dearle,Male,1954-01-20,Recruiting Manager,Property,Mass Customer,N,Yes,0 Holy Cross Court,4211,QLD,Australia,9
3,4,Talbot,,Male,1961-10-03,,IT,Mass Customer,N,No,17979 Del Mar Point,2448,New South Wales,Australia,4
4,5,Sheila-kathryn,Calton,Female,1977-05-13,Senior Editor,,Affluent Customer,N,Yes,9 Oakridge Court,3216,VIC,Australia,9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3995,3996,Rosalia,Halgarth,Female,1975-08-09,VP Product Management,Health,Mass Customer,N,No,57042 Village Green Point,4511,QLD,Australia,6
3996,3997,Blanch,Nisuis,Female,2001-07-13,Statistician II,Manufacturing,High Net Worth,N,Yes,87 Crescent Oaks Alley,2756,NSW,Australia,10
3997,3998,Sarene,Woolley,U,NaT,Assistant Manager,IT,High Net Worth,N,No,8194 Lien Street,4032,QLD,Australia,7
3998,3999,Patrizius,,Male,1973-10-24,,Manufacturing,Affluent Customer,N,Yes,320 Acker Drive,2251,NSW,Australia,7


In [19]:
transactions = pd.read_csv("transaction.csv", delimiter=";")
transactions["transaction_date"] = pd.to_datetime(transactions["transaction_date"])
transactions[["list_price", "standard_cost"]] = transactions[["list_price", "standard_cost"]].replace({",":"."}, regex=True)
transactions = transactions.astype({'list_price': 'float', 'standard_cost': 'float'})
transactions

Unnamed: 0,transaction_id,product_id,customer_id,transaction_date,online_order,order_status,brand,product_line,product_class,product_size,list_price,standard_cost
0,1,2,2950,2017-02-25,False,Approved,Solex,Standard,medium,medium,71.49,53.62
1,2,3,3120,2017-05-21,True,Approved,Trek Bicycles,Standard,medium,large,2091.47,388.92
2,3,37,402,2017-10-16,False,Approved,OHM Cycles,Standard,low,medium,1793.43,248.82
3,4,88,3135,2017-08-31,False,Approved,Norco Bicycles,Standard,medium,medium,1198.46,381.10
4,5,78,787,2017-10-01,True,Approved,Giant Bicycles,Standard,medium,large,1765.30,709.48
...,...,...,...,...,...,...,...,...,...,...,...,...
19995,19996,51,1018,2017-06-24,True,Approved,OHM Cycles,Standard,high,medium,2005.66,1203.40
19996,19997,41,127,2017-11-09,True,Approved,Solex,Road,medium,medium,416.98,312.74
19997,19998,87,2284,2017-04-14,True,Approved,OHM Cycles,Standard,medium,medium,1636.90,44.71
19998,19999,6,2764,2017-07-03,False,Approved,OHM Cycles,Standard,high,medium,227.88,136.73


Создаём таблицы

In [20]:
execute_query("CREATE SCHEMA store_2")

Запрос успешно выполнен! 🎉🎉🎉


In [21]:
# Создаём таблицу customers
sql = """
CREATE TABLE store_2.customers (
    customer_id SERIAL PRIMARY KEY,
    first_name VARCHAR(50),
    last_name VARCHAR(50),
    gender VARCHAR(30),
    DOB DATE,
    job_title VARCHAR(50),
    job_industry_category VARCHAR(50),
    wealth_segment VARCHAR(50),
    deceased_indicator VARCHAR(50),
    owns_car VARCHAR(30),
    address VARCHAR(50),
    postcode VARCHAR(30),
    state VARCHAR(30),
    country VARCHAR(30),
    property_valuation INT4
)
"""

execute_query(sql)

Запрос успешно выполнен! 🎉🎉🎉


In [22]:
# Загружаем данные по клиентам
customers.to_sql(schema="store_2", name="customers", con=engine, if_exists="replace", index=False)

# Проверяем
pd.read_sql("SELECT * FROM store_2.customers LIMIT 5", con=engine)

Unnamed: 0,customer_id,first_name,last_name,gender,DOB,job_title,job_industry_category,wealth_segment,deceased_indicator,owns_car,address,postcode,state,country,property_valuation
0,1,Laraine,Medendorp,F,1953-10-12,Executive Secretary,Health,Mass Customer,N,Yes,060 Morning Avenue,2016,New South Wales,Australia,10
1,2,Eli,Bockman,Male,1980-12-16,Administrative Officer,Financial Services,Mass Customer,N,Yes,6 Meadow Vale Court,2153,New South Wales,Australia,10
2,3,Arlin,Dearle,Male,1954-01-20,Recruiting Manager,Property,Mass Customer,N,Yes,0 Holy Cross Court,4211,QLD,Australia,9
3,4,Talbot,,Male,1961-10-03,,IT,Mass Customer,N,No,17979 Del Mar Point,2448,New South Wales,Australia,4
4,5,Sheila-kathryn,Calton,Female,1977-05-13,Senior Editor,,Affluent Customer,N,Yes,9 Oakridge Court,3216,VIC,Australia,9


In [23]:
# Создаём таблицу transactions
sql = """
CREATE TABLE store_2.transactions (
    transaction_id SERIAL PRIMARY KEY,
    product_id Int4,
    customer_id Int4,
    transaction_date DATE,
    online_order VARCHAR(30),
    order_status VARCHAR(30),
    brand VARCHAR(30),
    product_line VARCHAR(30),
    product_class VARCHAR(30),
    product_size VARCHAR(30),
    list_price DECIMAL(10, 2),
    standard_cost DECIMAL(10, 2)
)
"""

execute_query(sql)

Запрос успешно выполнен! 🎉🎉🎉


In [24]:
# Загружаем данные по транзакциям
transactions.to_sql(schema="store_2", name="transactions", con=engine, if_exists="replace", index=False)

# Проверяем
pd.read_sql("SELECT * FROM store_2.transactions LIMIT 5", con=engine)

Unnamed: 0,transaction_id,product_id,customer_id,transaction_date,online_order,order_status,brand,product_line,product_class,product_size,list_price,standard_cost
0,1,2,2950,2017-02-25,False,Approved,Solex,Standard,medium,medium,71.49,53.62
1,2,3,3120,2017-05-21,True,Approved,Trek Bicycles,Standard,medium,large,2091.47,388.92
2,3,37,402,2017-10-16,False,Approved,OHM Cycles,Standard,low,medium,1793.43,248.82
3,4,88,3135,2017-08-31,False,Approved,Norco Bicycles,Standard,medium,medium,1198.46,381.1
4,5,78,787,2017-10-01,True,Approved,Giant Bicycles,Standard,medium,large,1765.3,709.48


## Задание 1. Вывести все уникальные бренды, у которых стандартная стоимость выше 1500 долларов

In [25]:
# Выводим все строки
pd.set_option('display.max_rows', 100)

sql = """
select DISTINCT
	brand
from store_2.transactions as t
where 1 = 1
and standard_cost >= 1500
"""

pd.read_sql(sql, con=engine)

Unnamed: 0,brand
0,Solex
1,Giant Bicycles
2,OHM Cycles
3,Trek Bicycles


## Задание 2. Вывести все подтвержденные транзакции за период '2017-04-01' по '2017-04-09' включительно

In [26]:
sql = """
select
	*
from store_2.transactions as t
where 1 = 1
and transaction_date between '2017-04-01' and '2017-04-09'
and order_status = 'Approved'
"""

pd.read_sql(sql, con=engine)

Unnamed: 0,transaction_id,product_id,customer_id,transaction_date,online_order,order_status,brand,product_line,product_class,product_size,list_price,standard_cost
0,17,79,2426,2017-04-03,False,Approved,Norco Bicycles,Standard,medium,medium,1555.58,818.01
1,19,54,2268,2017-04-06,True,Approved,WeareA2B,Standard,medium,medium,1292.84,13.44
2,23,37,2001,2017-04-08,True,Approved,OHM Cycles,Standard,low,medium,1793.43,248.82
3,83,0,3398,2017-04-01,True,Approved,OHM Cycles,Standard,medium,medium,235.63,125.07
4,89,0,2682,2017-04-04,True,Approved,OHM Cycles,Road,high,large,12.01,7.21
...,...,...,...,...,...,...,...,...,...,...,...,...
526,19655,0,336,2017-04-09,True,Approved,Norco Bicycles,Standard,medium,medium,360.40,270.30
527,19853,7,3072,2017-04-02,False,Approved,Trek Bicycles,Road,low,medium,980.37,234.43
528,19899,57,325,2017-04-06,False,Approved,WeareA2B,Touring,medium,large,1890.39,260.14
529,19968,0,2751,2017-04-06,False,Approved,WeareA2B,Standard,medium,medium,60.34,45.26


## Задание 3. Вывести все профессии у клиентов из сферы IT или Financial Services, которые начинаются с фразы 'Senior'

In [27]:
sql = """
select DISTINCT
	job_title as "Senior'ы"
from store_2.customers as c
where 1 = 1
and job_industry_category in ('IT', 'Financial Services')
and job_title ~* '^senior'
"""

pd.read_sql(sql, con=engine)

Unnamed: 0,Senior'ы
0,Senior Cost Accountant
1,Senior Developer
2,Senior Editor
3,Senior Financial Analyst
4,Senior Quality Engineer
5,Senior Sales Associate


## Задание 4. Вывести все бренды, которые закупают клиенты, работающие в сфере Financial Services

In [28]:
sql = """
select distinct
	COALESCE(brand, 'Бренд не определён') as brand
from store_2.transactions
JOIN store_2.customers using (customer_id)
where 1 = 1
and job_industry_category = 'Financial Services'
"""

pd.read_sql(sql, con=engine)

Unnamed: 0,brand
0,WeareA2B
1,Бренд не определён
2,Norco Bicycles
3,Solex
4,Giant Bicycles
5,OHM Cycles
6,Trek Bicycles


## Задание 5. Вывести 10 клиентов, которые оформили онлайн-заказ продукции из брендов 'Giant Bicycles', 'Norco Bicycles', 'Trek Bicycles

In [29]:
# Топ 10 клиенто, отсортированные по числу заказов
sql = """
select DISTINCT
	c.*,
	count(transaction_id) over(partition by customer_id) as orders
from store_2.transactions as t
JOIN store_2.customers as c using (customer_id)
where 1 = 1
and online_order
and brand in ('Giant Bicycles', 'Norco Bicycles', 'Trek Bicycles')
order by orders desc
limit 10
"""

pd.read_sql(sql, con=engine)

Unnamed: 0,customer_id,first_name,last_name,gender,DOB,job_title,job_industry_category,wealth_segment,deceased_indicator,owns_car,address,postcode,state,country,property_valuation,orders
0,2498,Rosana,Emmatt,Female,1978-02-02,Software Test Engineer III,Retail,High Net Worth,N,No,77410 Bluejay Street,3046,VIC,Australia,9,6
1,2841,Bibby,Carnson,Female,1963-01-31,Recruiting Manager,,Mass Customer,N,No,082 Twin Pines Avenue,4270,QLD,Australia,7,6
2,2877,Vaughan,Frank,Male,1967-11-01,Senior Sales Associate,Property,Affluent Customer,N,Yes,8750 Corben Trail,2089,NSW,Australia,11,6
3,115,Alberik,Mereweather,Male,1981-12-21,Account Executive,Health,Mass Customer,N,No,5814 Bunker Hill Street,3070,VIC,Australia,7,5
4,446,Marilee,Oosthout de Vree,Female,1978-07-02,Tax Accountant,IT,Affluent Customer,N,Yes,98 Scott Parkway,2096,NSW,Australia,10,5
5,464,Karel,Dimelow,Female,1958-03-23,,,Mass Customer,N,Yes,091 Farragut Crossing,3058,Victoria,Australia,9,5
6,637,Mercy,Wilsone,Female,1976-09-23,Health Coach I,Health,High Net Worth,N,No,049 Michigan Parkway,3337,Victoria,Australia,6,5
7,704,Lindsay,Acaster,Female,1998-12-08,Research Nurse,Health,Mass Customer,N,No,18939 Upham Hill,3551,Victoria,Australia,3,5
8,925,Nicko,Pembridge,Male,1978-08-07,Accountant IV,Manufacturing,Affluent Customer,N,No,10421 Cordelia Parkway,3188,VIC,Australia,12,5
9,1243,Robbert,Blakey,Male,1953-08-09,Research Assistant II,Retail,Mass Customer,N,No,90351 Duke Junction,4820,QLD,Australia,1,5


## Задание 6. Вывести всех клиентов, у которых нет транзакций

In [30]:
sql = """
select
    *
from store_2.customers
where customer_id not in (
    select distinct
        customer_id
    from store_2.transactions
)
"""

pd.read_sql(sql, con=engine)

Unnamed: 0,customer_id,first_name,last_name,gender,DOB,job_title,job_industry_category,wealth_segment,deceased_indicator,owns_car,address,postcode,state,country,property_valuation
0,852,Andie,Bonney,Female,2000-11-04,Compensation Analyst,Financial Services,Affluent Customer,N,Yes,94 Anhalt Way,3139,VIC,Australia,7
1,869,Addia,Abels,Female,1984-03-11,Account Representative I,Financial Services,High Net Worth,N,Yes,02377 Maywood Trail,2287,NSW,Australia,7
2,1373,Shaylynn,Epsley,Female,1958-09-23,Director of Sales,Financial Services,Mass Customer,N,Yes,0 Grasskamp Pass,3170,VIC,Australia,10
3,2074,Roslyn,Rawdall,Female,1997-06-11,,Financial Services,Mass Customer,N,No,95483 Washington Junction,2505,NSW,Australia,9
4,2660,Hunt,Scollard,Male,1963-11-15,,Retail,High Net Worth,N,Yes,359 Briar Crest Road,2155,NSW,Australia,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
502,3996,Rosalia,Halgarth,Female,1975-08-09,VP Product Management,Health,Mass Customer,N,No,57042 Village Green Point,4511,QLD,Australia,6
503,3997,Blanch,Nisuis,Female,2001-07-13,Statistician II,Manufacturing,High Net Worth,N,Yes,87 Crescent Oaks Alley,2756,NSW,Australia,10
504,3998,Sarene,Woolley,U,NaT,Assistant Manager,IT,High Net Worth,N,No,8194 Lien Street,4032,QLD,Australia,7
505,3999,Patrizius,,Male,1973-10-24,,Manufacturing,Affluent Customer,N,Yes,320 Acker Drive,2251,NSW,Australia,7


## Задание 7. Вывести всех клиентов из IT, у которых транзакции с максимальной стандартной стоимостью

In [31]:
# Вывод информации о клиентах, у которых есть заказ с максимальной стоимостью среди работников IT
# Если нужно посчитать работников IT среди всех заказов, то убираем partition by
sql = """
select DISTINCT ON (customer_id) -- Удаляем дубли, если у одного клиента несколько подходящих заказов
	*
from (
	select
		c.*,
		standard_cost,
		max(standard_cost) over(partition by job_industry_category) as max_cost_per_industry
	from store_2.transactions as t
	JOIN store_2.customers as c using (customer_id)
)
where 1 = 1
and standard_cost = max_cost_per_industry
and job_industry_category = 'IT'
"""

pd.read_sql(sql, con=engine)

Unnamed: 0,customer_id,first_name,last_name,gender,DOB,job_title,job_industry_category,wealth_segment,deceased_indicator,owns_car,address,postcode,state,country,property_valuation,standard_cost,max_cost_per_industry
0,34,Jephthah,Bachmann,U,1843-12-21,Legal Assistant,IT,Affluent Customer,N,No,90 Lawn Parkway,4805,QLD,Australia,4,1759.85,1759.85
1,893,Gibby,Fearnley,Male,1983-09-11,Geologist I,IT,Mass Customer,N,No,6382 Bayside Street,2153,NSW,Australia,10,1759.85,1759.85
2,975,Goldarina,Rzehorz,U,NaT,Automation Specialist IV,IT,Mass Customer,N,No,938 Monica Park,3173,VIC,Australia,9,1759.85,1759.85
3,1672,Sharla,Creebo,Female,1963-04-27,Design Engineer,IT,Affluent Customer,N,Yes,1 Morning Circle,2250,NSW,Australia,8,1759.85,1759.85
4,1773,Nickolas,Guittet,U,NaT,,IT,Mass Customer,N,Yes,835 West Hill,2118,NSW,Australia,11,1759.85,1759.85
5,1918,Devin,Sandeson,U,NaT,Staff Accountant II,IT,Affluent Customer,N,No,51763 Carey Place,3056,VIC,Australia,7,1759.85,1759.85
6,2913,Padraic,Bonnar,Male,1955-07-11,VP Quality Control,IT,Affluent Customer,N,Yes,937 Sloan Center,4118,QLD,Australia,6,1759.85,1759.85
7,3151,Thorn,Choffin,U,NaT,Senior Developer,IT,Affluent Customer,N,Yes,5323 Chive Avenue,2486,NSW,Australia,7,1759.85,1759.85
8,3473,Sanderson,Alloway,U,NaT,Analog Circuit Design manager,IT,Mass Customer,N,No,3 Roxbury Street,2261,NSW,Australia,7,1759.85,1759.85


## Задание 8. Вывести всех клиентов из сферы IT и Health, у которых есть подтвержденные транзакции за период '2017-07-07' по '2017-07-17'

In [32]:
# Находим нужные customer_id в transactions и смотрим по ним данные в customers
sql = """
select
	*
from store_2.customers
where 1 = 1
and job_industry_category in ('IT', 'Health')
and customer_id in (
	select distinct
		customer_id
	from store_2.transactions
	where transaction_date between '2017-07-07' and '2017-07-17'
	and order_status = 'Approved'
)
"""

pd.read_sql(sql, con=engine)

Unnamed: 0,customer_id,first_name,last_name,gender,DOB,job_title,job_industry_category,wealth_segment,deceased_indicator,owns_car,address,postcode,state,country,property_valuation
0,22,Deeanne,Durtnell,Female,1962-12-10,,IT,Mass Customer,N,No,52 Carey Alley,4740,QLD,Australia,5
1,28,Fee,Zellmer,Male,1973-09-30,Senior Quality Engineer,Health,Affluent Customer,N,Yes,2951 Petterle Place,2756,New South Wales,Australia,9
2,41,Basilius,Coupe,Male,1976-04-14,Food Chemist,Health,Mass Customer,N,No,2028 Lakewood Place,2480,New South Wales,Australia,3
3,47,Matthew,Jeaycock,Male,1992-10-22,Registered Nurse,Health,Affluent Customer,N,No,4853 Gulseth Avenue,4503,QLD,Australia,5
4,104,Odille,Panketh,Female,1978-06-06,Automation Specialist II,Health,Mass Customer,N,Yes,0410 Division Junction,2750,New South Wales,Australia,8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
110,3255,Sutherlan,Truin,U,NaT,Engineer IV,IT,High Net Worth,N,No,12357 Arapahoe Parkway,4740,QLD,Australia,3
111,3288,Fair,Dewen,U,NaT,Engineer III,IT,High Net Worth,N,No,6 Golf Center,2042,NSW,Australia,10
112,3360,Joelie,Sherlaw,Female,1963-03-23,Quality Engineer,Health,Affluent Customer,N,No,62551 Del Mar Avenue,2300,NSW,Australia,6
113,3365,Karlens,Chaffyn,U,NaT,Engineer III,IT,Mass Customer,N,No,7234 Dawn Alley,4670,QLD,Australia,1
