# JDA Test Task  
### Камалетдинова Юлия  
--------------------------

## Задание 1

#### 1. Запрос для получения среднего по актуальным ценам

``` sql
SELECT avg(a.price)  
FROM prices a  
INNER JOIN (  
    SELECT price, MAX(date_start) date_start  
    FROM prices  
    GROUP BY product  
) b ON a.price = b.price and a.date_start = b.date_start;
```

#### 2. Стоимости и соответствующие даты для продукта `product`

In [1]:
import os
import sqlite3

In [2]:
DB_FILE = 'DB.db'
DEFAULT_PATH = os.path.join(os.getcwd(), DB_FILE)

In [3]:
def db_connect(db_path=DEFAULT_PATH):
    conn = sqlite3.connect(db_path)
    return conn

In [4]:
def get_product_data(product: str) -> tuple:
    '''
    Get price history for the product from `prices` DB table.
    
    Args:
        product: the product name.
    Returns:
        tuple of dates and prices corresponding to 
        prod_name ordered by ascending date:
        ([date_start1, date_start2, ...], [price1, price2, ...])
    '''
    
    conn = db_connect()
    cur = conn.cursor()
    cur.execute("SELECT date_start, price FROM prices WHERE product = ?" 
                "ORDER BY date_start ASC", 
                (product,))
    
    rows = cur.fetchall()
    dates_prices = tuple(map(list, zip(*rows)))
    
    return dates_prices

In [5]:
get_product_data('A')

(['1970-01-01',
  '2010-01-21',
  '2010-08-17',
  '2010-11-02',
  '2012-08-27',
  '2012-12-06',
  '2013-01-11',
  '2013-05-02',
  '2013-05-08',
  '2013-11-01',
  '2014-08-30',
  '2015-12-29',
  '2016-05-04',
  '2016-08-14',
  '2016-09-28',
  '2017-04-01'],
 [297.79,
  164.68,
  558.58,
  127.08,
  352.13,
  0.35,
  100.61,
  120.5,
  260.19,
  1747.84,
  59.84,
  118.27,
  550.62,
  339.35,
  210.19,
  111.23])

## Задание 2

#### 1. Запрос для создания таблицы `revenue`

``` sql
CREATE TABLE revenue
    AS SELECT s.*,
            (SELECT p.price
             FROM prices p
             WHERE p.date_start <= s.date AND p.product = s.product
             ORDER BY p.date_start DESC
             LIMIT 1
            ) * s.amount AS revenue
        FROM sales s;
```

#### 2. Функция для расчета стандартного выборочного отклонения `revenue`

In [6]:
import numpy as np

In [14]:
def get_ssd(prod_name: str) -> np.array:
    '''
    Counts sample standard deviation of revenue by product name.
    
    Args:
        prod_name: the product name.
    Returns:
        sample standard deviation
    '''
    conn = db_connect() 
    cur = conn.cursor()
    cur.execute("SELECT revenue FROM revenue WHERE product = ?;",
               (prod_name,))
    
    rows = cur.fetchall()
    rev = list(zip(*rows))[0]
    
    ssd = np.std(rev, ddof=1)
    
    return ssd

In [15]:
print(get_ssd('B'))

33465.93838814805


## Задание 3

In [9]:
from sklearn.linear_model import LinearRegression

In [12]:
def lin_reg(product: str, date_start: str, date_end: str) -> tuple:
    """
    Trains Least Squares Regression model y = kx + b.
    
    Args:
        product: the product name.
        date_start: starting date in `YYYY-MM-DD` format
        date_end: ending date in `YYYY-MM-DD` format
    Returns:
        coefficients k, b of the lm model 
    """
    conn = db_connect()
    cur = conn.cursor()
    cur.execute("SELECT julianday(date) - julianday(?),"
                "amount FROM revenue WHERE date > ? AND "
                "date < ? AND product = ?;",
               (date_start, date_start, date_end, product,))
    rows = cur.fetchall()
    
    # Transform query data
    days_amounts = list(zip(*rows))
    x = np.array([int(day) for day in days_amounts[0]]).reshape(-1, 1)
    y = np.array(days_amounts[1]).reshape(-1, 1)
    
    # Train linear model
    model = LinearRegression()
    model.fit(x, y)
    
    return model.coef_, model.intercept_

In [13]:
lin_reg(product='Q', date_start='2019-01-01', date_end='2019-03-31')

(array([[-0.16513156]]), array([28.95062696]))