In [None]:
from sqlalchemy import create_engine
import pandas as pd

engine = create_engine('postgresql://localhost:5432/mydatabase')

In [54]:
transactions = pd.read_csv('../transaction.csv', sep=';')
transactions['list_price'] = transactions['list_price'].str.replace(',', '.').astype('float')
transactions['standard_cost'] = transactions['standard_cost'].str.replace(',', '.').astype('float')
transactions['transaction_date'] = transactions['transaction_date'].apply(lambda x: pd.to_datetime(x, format='%d.%m.%Y'))
transactions.head()

Unnamed: 0,transaction_id,product_id,customer_id,transaction_date,online_order,order_status,brand,product_line,product_class,product_size,list_price,standard_cost
0,1,2,2950,2017-02-25,False,Approved,Solex,Standard,medium,medium,71.49,53.62
1,2,3,3120,2017-05-21,True,Approved,Trek Bicycles,Standard,medium,large,2091.47,388.92
2,3,37,402,2017-10-16,False,Approved,OHM Cycles,Standard,low,medium,1793.43,248.82
3,4,88,3135,2017-08-31,False,Approved,Norco Bicycles,Standard,medium,medium,1198.46,381.1
4,5,78,787,2017-10-01,True,Approved,Giant Bicycles,Standard,medium,large,1765.3,709.48


In [55]:
transactions.to_sql('transactions', engine, if_exists='replace', index=False)

1000

In [None]:
customer = pd.read_csv('../customer.csv', sep=';')
customer.to_sql('customer', engine, if_exists='replace', index=False)
customer.head()

Unnamed: 0,customer_id,first_name,last_name,gender,DOB,job_title,job_industry_category,wealth_segment,deceased_indicator,owns_car,address,postcode,state,country,property_valuation
0,1,Laraine,Medendorp,F,1953-10-12,Executive Secretary,Health,Mass Customer,N,Yes,060 Morning Avenue,2016,New South Wales,Australia,10
1,2,Eli,Bockman,Male,1980-12-16,Administrative Officer,Financial Services,Mass Customer,N,Yes,6 Meadow Vale Court,2153,New South Wales,Australia,10
2,3,Arlin,Dearle,Male,1954-01-20,Recruiting Manager,Property,Mass Customer,N,Yes,0 Holy Cross Court,4211,QLD,Australia,9
3,4,Talbot,,Male,1961-10-03,,IT,Mass Customer,N,No,17979 Del Mar Point,2448,New South Wales,Australia,4
4,5,Sheila-kathryn,Calton,Female,1977-05-13,Senior Editor,,Affluent Customer,N,Yes,9 Oakridge Court,3216,VIC,Australia,9


Вывести все уникальные бренды, у которых стандартная стоимость выше 1500 долларов.

In [27]:
query = """
SELECT DISTINCT t.brand
FROM transactions AS t
WHERE t.standard_cost > 1500
"""

pd.read_sql_query(query,con=engine)

Unnamed: 0,brand
0,OHM Cycles
1,Trek Bicycles
2,Solex
3,Giant Bicycles


Вывести все подтвержденные транзакции за период '2017-04-01' по '2017-04-09' включительно.

In [None]:
query = """
SELECT t.*
FROM transactions AS t
WHERE 
    t.order_status = 'Approved'
    AND t.transaction_date BETWEEN '2017-04-01' AND '2017-04-09';
"""

pd.read_sql_query(query,con=engine)

Unnamed: 0,transaction_id,product_id,customer_id,transaction_date,online_order,order_status,brand,product_line,product_class,product_size,list_price,standard_cost
0,17,79,2426,2017-04-03,False,Approved,Norco Bicycles,Standard,medium,medium,1555.58,818.01
1,19,54,2268,2017-04-06,True,Approved,WeareA2B,Standard,medium,medium,1292.84,13.44
2,23,37,2001,2017-04-08,True,Approved,OHM Cycles,Standard,low,medium,1793.43,248.82
3,83,0,3398,2017-04-01,True,Approved,OHM Cycles,Standard,medium,medium,235.63,125.07
4,89,0,2682,2017-04-04,True,Approved,OHM Cycles,Road,high,large,12.01,7.21
...,...,...,...,...,...,...,...,...,...,...,...,...
526,19655,0,336,2017-04-09,True,Approved,Norco Bicycles,Standard,medium,medium,360.40,270.30
527,19853,7,3072,2017-04-02,False,Approved,Trek Bicycles,Road,low,medium,980.37,234.43
528,19899,57,325,2017-04-06,False,Approved,WeareA2B,Touring,medium,large,1890.39,260.14
529,19968,0,2751,2017-04-06,False,Approved,WeareA2B,Standard,medium,medium,60.34,45.26


Вывести все профессии у клиентов из сферы IT или Financial Services, которые начинаются с фразы 'Senior'.

In [57]:
query = """
SELECT DISTINCT c.job_title
FROM customer AS c
WHERE c.job_industry_category IN ('IT', 'Financial Services')
  AND c.job_title LIKE 'Senior%%';
"""

pd.read_sql_query(query, con=engine)

Unnamed: 0,job_title
0,Senior Cost Accountant
1,Senior Developer
2,Senior Editor
3,Senior Financial Analyst
4,Senior Quality Engineer
5,Senior Sales Associate


Вывести все бренды, которые закупают клиенты, работающие в сфере Financial Services

In [None]:
query = """
SELECT DISTINCT t.brand
FROM transactions AS t
JOIN customer AS c ON 
    t.customer_id = c.customer_id
WHERE 
    c.job_industry_category = 'Financial Services'
    AND t.brand IS NOT NULL
"""

pd.read_sql_query(query,con=engine)

Unnamed: 0,brand
0,OHM Cycles
1,Trek Bicycles
2,WeareA2B
3,Solex
4,Norco Bicycles
5,Giant Bicycles


Вывести 10 клиентов, которые оформили онлайн-заказ продукции из брендов 'Giant Bicycles', 'Norco Bicycles', 'Trek Bicycles'.

In [58]:
query = """
SELECT c.customer_id,
       c.first_name,
       c.last_name
FROM customer AS c
JOIN transactions AS t ON 
    c.customer_id = t.customer_id
WHERE 
    t.online_order = TRUE
    AND t.brand IN ('Giant Bicycles', 'Norco Bicycles', 'Trek Bicycles')
LIMIT 10;
"""

pd.read_sql_query(query,con=engine)

Unnamed: 0,customer_id,first_name,last_name
0,3120,Lauree,O'Donnell
1,787,Norma,Batrim
2,2339,Damien,Haddeston
3,1243,Robbert,Blakey
4,3002,Tracey,Verdun
5,2666,Lyle,Eager
6,3368,Tanya,Boddis
7,2810,Malvin,Burchill
8,2003,Lilli,Hargey
9,2448,Lorri,Iston


Вывести всех клиентов, у которых нет транзакций.

In [59]:
query = """
SELECT c.customer_id,
       c.first_name,
       c.last_name
FROM customer AS c
LEFT JOIN transactions AS t ON 
    c.customer_id = t.customer_id
WHERE t.customer_id IS NULL
"""

pd.read_sql_query(query,con=engine)

Unnamed: 0,customer_id,first_name,last_name
0,852,Andie,Bonney
1,869,Addia,Abels
2,1373,Shaylynn,Epsley
3,2074,Roslyn,Rawdall
4,2660,Hunt,Scollard
...,...,...,...
502,3996,Rosalia,Halgarth
503,3997,Blanch,Nisuis
504,3998,Sarene,Woolley
505,3999,Patrizius,


Вывести всех клиентов из IT, у которых транзакции с максимальной стандартной стоимостью

In [None]:
query = """
with max_st_cost AS (
    SELECT MAX(standard_cost)
    FROM transactions
)
SELECT c.customer_id,
       c.first_name,
       c.last_name
FROM customer AS c
JOIN transactions AS t ON 
    c.customer_id = t.customer_id
WHERE 
    c.job_industry_category = 'IT'
    AND t.standard_cost = (select * from max_st_cost)

"""

pd.read_sql_query(query,con=engine)

Unnamed: 0,customer_id,first_name,last_name
0,3473,Sanderson,Alloway
1,893,Gibby,Fearnley
2,3151,Thorn,Choffin
3,34,Jephthah,Bachmann
4,2913,Padraic,Bonnar
5,1918,Devin,Sandeson
6,1672,Sharla,Creebo
7,975,Goldarina,Rzehorz
8,1773,Nickolas,Guittet


Вывести всех клиентов из сферы IT и Health, у которых есть подтвержденные транзакции за период '2017-07-07' по '2017-07-17'.

In [61]:
query = """
SELECT DISTINCT c.customer_id,
       c.first_name,
       c.last_name
FROM customer AS c
JOIN transactions AS t ON 
    c.customer_id = t.customer_id
WHERE 
    c.job_industry_category IN ('IT', 'Health')
    AND t.order_status = 'Approved'
    AND t.transaction_date BETWEEN '2017-07-07' AND '2017-07-17';
"""

pd.read_sql_query(query,con=engine)

Unnamed: 0,customer_id,first_name,last_name
0,22,Deeanne,Durtnell
1,28,Fee,Zellmer
2,41,Basilius,Coupe
3,47,Matthew,Jeaycock
4,104,Odille,Panketh
...,...,...,...
110,3255,Sutherlan,Truin
111,3288,Fair,Dewen
112,3360,Joelie,Sherlaw
113,3365,Karlens,Chaffyn
