Для данного задания используется СУБД Postgres.

In [9]:
import psycopg2
import csv
from getpass import getpass

In [10]:
DB_CONFIGS = {
    'host': 'localhost',
    'dbname': 'postgres',
    'user': input('Enter Postgres username: '),
    'password': getpass('Enter Postgres password: ')
}

Enter Postgres username: postgres
Enter Postgres password: ········


In [13]:
conn = psycopg2.connect(**DB_CONFIGS)
cur = conn.cursor()

Создаем в базе данных таблицу, заполняем ее данными из csv-файла.

Для удобства создаем представление, в котором вместо даты используется месяц закупки.

С таким представлением можно работать в том случае, если мы рассматриваем данные в рамках одного года, иначе все сломается, поскольку следующий месяц за декабрем, например, 2020 года, будет январь 2021 года, но в таком представлении даты будут выглядеть как 12 и 1.

In [14]:
with conn.cursor() as cur:
    cur.execute("drop table if exists orders cascade")
    cur.execute('''
        create table orders(
            client_id integer,
            purchase_date varchar(15)
        )
    ''')

    # conn.commit()

    with open('orders.csv', 'r') as f:
        reader = csv.reader(f)
        next(reader) # Skip the header row.
        for row in reader:
            cur.execute(
            "INSERT INTO orders VALUES (%s, %s)", row
            )
            
    cur.execute('drop view if exists purchases')
    cur.execute('''create view purchases as
        (select client_id, extract(month from to_date(purchase_date, 'MM/DD/YYYY'))::int as purchase_month
        from orders);
        ''')
        
conn.commit()
# conn.close()

Запросы к заданию.

In [19]:
query1 = '''
    select t.purchase_month, sum(t.is_new) as t_count
    from
    (select client_id, purchase_month,
            (case lag(purchase_month, 1, 0) over (partition by client_id order by purchase_month) when 0 then 1 else 0 end) as is_new
    from purchases) as t
    group by t.purchase_month
    order by t.purchase_month
    '''

query2 = '''
    select t.purchase_month, sum(t.twice)
    from (select client_id, purchase_month,
           (case purchase_month - lag(purchase_month, 1, purchase_month) over (partition by client_id order by purchase_month) when 1 then 1 else 0 end) as twice
    from purchases) as t
    group by t.purchase_month
    order by t.purchase_month
'''

query3 = '''
    select t.purchase_month, sum(t.returned)
    from (select client_id, purchase_month,
           (case when purchase_month - lag(purchase_month, 1, purchase_month) over (partition by client_id order by purchase_month) > 1 then 1 else 0 end) as returned
    from purchases) as t
    group by t.purchase_month
    order by t.purchase_month
'''

query4 = '''
    select t1.purchase_month, count(t2.client_id)
    from (select distinct purchase_month
    from purchases) as t1
    left join (select client_id, purchase_month,
           (lead(purchase_month, 1, 0) over (partition by client_id order by purchase_month)) as next
    from purchases) as t2
    on t1.purchase_month = (t2.purchase_month + 1) and (t2.next != t1.purchase_month) and (t2.purchase_month != t2.next)
    group by t1.purchase_month
    order by t1.purchase_month
'''

In [17]:
with conn.cursor() as cur:
    cur.execute(query1)
    
    for row in cur.fetchall():
        print(row)

(5, 100)
(6, 15)
(7, 0)
(8, 0)


In [18]:
with conn.cursor() as cur:
    cur.execute(query2)
    
    for row in cur.fetchall():
        print(row)

(5, 0)
(6, 79)
(7, 68)
(8, 0)


In [20]:
with conn.cursor() as cur:
    cur.execute(query3)
    
    for row in cur.fetchall():
        print(row)

(5, 0)
(6, 0)
(7, 5)
(8, 1)


In [21]:
with conn.cursor() as cur:
    cur.execute(query4)
    
    for row in cur.fetchall():
        print(row)

(5, 0)
(6, 21)
(7, 26)
(8, 73)


In [22]:
conn.close()