In [2]:
# To handle data
import numpy as np
import pandas as pd

# To make visualizations
import hvplot.pandas
import panel as pn; pn.extension()
from panel.template import DarkTheme

from psycopg2 import OperationalError
from psycopg2.extras import DictCursor # NamedTupleCursor
import psycopg2

In [3]:
def create_connection(db_name, db_user, db_password, db_host, db_port):
    connection = None
    try:
        connection = psycopg2.connect(
            database=db_name,
            user=db_user,
            password=db_password,
            host=db_host,
            port=db_port,
        )
        print("Connection to PostgreSQL DB successful")
    except OperationalError as e:
        print(f"The error '{e}' occurred")
    return connection

# Dynamics of annual sales in retail and catering

In [4]:
connection = create_connection(db_name, db_user, db_password, db_host, db_port)

cursor = connection.cursor(cursor_factory=DictCursor)  
# from psycopg2.extras import NamedTupleCursor | cursor_factory=NamedTupleCursor

# Getting table values retail_sales
cursor.execute("""
SELECT date_part('year',sales_month) as sales_year, sum(sales) as sales
FROM retail_sales
WHERE kind_of_business = 'Retail and food services sales, total'
GROUP BY 1
ORDER BY 1
""")
retail_and_catering_sales_volume = cursor.fetchall()
cursor.close()  # Close the cursor
connection.close()  # We close the connection
print(retail_and_catering_sales_volume)

Connection to PostgreSQL DB successful
[[1992.0, Decimal('2014102')], [1993.0, Decimal('2153095')], [1994.0, Decimal('2330235')], [1995.0, Decimal('2450628')], [1996.0, Decimal('2603794')], [1997.0, Decimal('2726131')], [1998.0, Decimal('2852956')], [1999.0, Decimal('3086990')], [2000.0, Decimal('3287537')], [2001.0, Decimal('3378906')], [2002.0, Decimal('3459077')], [2003.0, Decimal('3612457')], [2004.0, Decimal('3846605')], [2005.0, Decimal('4085746')], [2006.0, Decimal('4294359')], [2007.0, Decimal('4439733')], [2008.0, Decimal('4391580')], [2009.0, Decimal('4064476')], [2010.0, Decimal('4284968')], [2011.0, Decimal('4598302')], [2012.0, Decimal('4826390')], [2013.0, Decimal('5001763')], [2014.0, Decimal('5215656')], [2015.0, Decimal('5349487')], [2016.0, Decimal('5510186')], [2017.0, Decimal('5744810')], [2018.0, Decimal('6001623')], [2019.0, Decimal('6218002')], [2020.0, Decimal('6224399')]]


In [5]:
# Change the data type to prepare values for graphical representation
np_array_int = np.array(retail_and_catering_sales_volume)
for i in np_array_int:
    i[0] = np.int_(i[0])
    i[1] = np.float32(i[1])
print(np_array_int)

[[1992 2014102.0]
 [1993 2153095.0]
 [1994 2330235.0]
 [1995 2450628.0]
 [1996 2603794.0]
 [1997 2726131.0]
 [1998 2852956.0]
 [1999 3086990.0]
 [2000 3287537.0]
 [2001 3378906.0]
 [2002 3459077.0]
 [2003 3612457.0]
 [2004 3846605.0]
 [2005 4085746.0]
 [2006 4294359.0]
 [2007 4439733.0]
 [2008 4391580.0]
 [2009 4064476.0]
 [2010 4284968.0]
 [2011 4598302.0]
 [2012 4826390.0]
 [2013 5001763.0]
 [2014 5215656.0]
 [2015 5349487.0]
 [2016 5510186.0]
 [2017 5744810.0]
 [2018 6001623.0]
 [2019 6218002.0]
 [2020 6224399.0]]


In [7]:
# Let's convert to DataFrame
df_1 = pd.DataFrame(np_array_int, columns=['sales_year', 'sales'])
print(df_1.head())

  sales_year      sales
0       1992  2014102.0
1       1993  2153095.0
2       1994  2330235.0
3       1995  2450628.0
4       1996  2603794.0


In [8]:
plot_1 = df_1.hvplot(kind='bar',
        x='sales_year',
        y='sales',
        color='orange',
        grid=True,
        title='Dynamics of annual sales in retail and catering',
        shared_axes=False)
plot_1

# Dynamics of annual retail sales of bookstores, sports goods stores and hobby, toys and games stores

In [10]:
connection = create_connection(db_name, db_user, db_password, db_host, db_port)

cursor = connection.cursor(cursor_factory=DictCursor)  
# from psycopg2.extras import NamedTupleCursor | cursor_factory=NamedTupleCursor

# Getting table values retail_sales
cursor.execute("""
SELECT date_part('year',sales_month) as sales_year,kind_of_business,sum(sales) as sales
FROM retail_sales
WHERE kind_of_business in ('Book stores','Sporting goods stores','Hobby, toy, and game stores')
GROUP BY 1,2
ORDER BY 1,2
;
""")
retail_sales_bs_spg_hs_toys_games = cursor.fetchall()
cursor.close()  # Close the cursor
connection.close()  # We close the connection
print(retail_sales_bs_spg_hs_toys_games)

Connection to PostgreSQL DB successful
[[1992.0, 'Book stores', Decimal('8327')], [1992.0, 'Hobby, toy, and game stores', Decimal('11251')], [1992.0, 'Sporting goods stores', Decimal('15583')], [1993.0, 'Book stores', Decimal('9108')], [1993.0, 'Hobby, toy, and game stores', Decimal('11651')], [1993.0, 'Sporting goods stores', Decimal('16791')], [1994.0, 'Book stores', Decimal('10107')], [1994.0, 'Hobby, toy, and game stores', Decimal('12850')], [1994.0, 'Sporting goods stores', Decimal('18825')], [1995.0, 'Book stores', Decimal('11196')], [1995.0, 'Hobby, toy, and game stores', Decimal('13714')], [1995.0, 'Sporting goods stores', Decimal('19869')], [1996.0, 'Book stores', Decimal('11905')], [1996.0, 'Hobby, toy, and game stores', Decimal('14502')], [1996.0, 'Sporting goods stores', Decimal('20810')], [1997.0, 'Book stores', Decimal('12742')], [1997.0, 'Hobby, toy, and game stores', Decimal('15021')], [1997.0, 'Sporting goods stores', Decimal('21167')], [1998.0, 'Book stores', Decimal(

In [11]:
# Change the data type to prepare values for graphical representation
np_array_1 = np.array(retail_sales_bs_spg_hs_toys_games)
for i in np_array_1:
    i[0] = np.int_(i[0])
    i[2] = np.int_(i[2])
print(np_array_1)

[[1992 'Book stores' 8327]
 [1992 'Hobby, toy, and game stores' 11251]
 [1992 'Sporting goods stores' 15583]
 [1993 'Book stores' 9108]
 [1993 'Hobby, toy, and game stores' 11651]
 [1993 'Sporting goods stores' 16791]
 [1994 'Book stores' 10107]
 [1994 'Hobby, toy, and game stores' 12850]
 [1994 'Sporting goods stores' 18825]
 [1995 'Book stores' 11196]
 [1995 'Hobby, toy, and game stores' 13714]
 [1995 'Sporting goods stores' 19869]
 [1996 'Book stores' 11905]
 [1996 'Hobby, toy, and game stores' 14502]
 [1996 'Sporting goods stores' 20810]
 [1997 'Book stores' 12742]
 [1997 'Hobby, toy, and game stores' 15021]
 [1997 'Sporting goods stores' 21167]
 [1998 'Book stores' 13282]
 [1998 'Hobby, toy, and game stores' 15833]
 [1998 'Sporting goods stores' 22284]
 [1999 'Book stores' 14172]
 [1999 'Hobby, toy, and game stores' 16651]
 [1999 'Sporting goods stores' 23699]
 [2000 'Book stores' 14879]
 [2000 'Hobby, toy, and game stores' 16947]
 [2000 'Sporting goods stores' 25308]
 [2001 'Book

In [12]:
# Let's convert to DataFrame
df_2 = pd.DataFrame(np_array_1, columns=['sales_year','kind_of_business', 'sales'])
print(df_2.head())

  sales_year             kind_of_business  sales
0       1992                  Book stores   8327
1       1992  Hobby, toy, and game stores  11251
2       1992        Sporting goods stores  15583
3       1993                  Book stores   9108
4       1993  Hobby, toy, and game stores  11651


In [13]:
plot_2 = df_2.hvplot(kind='line',
        x='sales_year',
        y='sales',
        by='kind_of_business',
        grid=True,
        title='Dynamics of annual retail sales of bookstores, sports goods stores and hobby, toys and games stores',
        legend='top',
        shared_axes=False)

plot_2


# Dynamics of sales in women's and men's clothing stores by year

In [14]:
connection = create_connection(db_name, db_user, db_password, db_host, db_port)

cursor = connection.cursor(cursor_factory=DictCursor)  
# from psycopg2.extras import NamedTupleCursor | cursor_factory=NamedTupleCursor

# Getting table values retail_sales
cursor.execute("""
SELECT date_part('year',sales_month) as sales_year
,sum(case when kind_of_business = 'Women''s clothing stores' then sales end) as womens_sales
,sum(case when kind_of_business = 'Men''s clothing stores' then sales end) as mens_sales
FROM retail_sales
WHERE kind_of_business in ('Men''s clothing stores','Women''s clothing stores')
GROUP BY 1
ORDER BY 1
;
""")
sales_w_m_clothing_year = cursor.fetchall()
cursor.close()  # Close the cursor
connection.close()  # We close the connection
print(sales_w_m_clothing_year)

Connection to PostgreSQL DB successful
[[1992.0, Decimal('31815'), Decimal('10179')], [1993.0, Decimal('32350'), Decimal('9962')], [1994.0, Decimal('30585'), Decimal('10032')], [1995.0, Decimal('28696'), Decimal('9315')], [1996.0, Decimal('28238'), Decimal('9546')], [1997.0, Decimal('27822'), Decimal('10069')], [1998.0, Decimal('28332'), Decimal('10196')], [1999.0, Decimal('29549'), Decimal('9667')], [2000.0, Decimal('31447'), Decimal('9507')], [2001.0, Decimal('31453'), Decimal('8625')], [2002.0, Decimal('31246'), Decimal('8112')], [2003.0, Decimal('32565'), Decimal('8249')], [2004.0, Decimal('34954'), Decimal('8566')], [2005.0, Decimal('37075'), Decimal('8737')], [2006.0, Decimal('38809'), Decimal('8844')], [2007.0, Decimal('40294'), Decimal('8772')], [2008.0, Decimal('38402'), Decimal('8351')], [2009.0, Decimal('36055'), Decimal('7353')], [2010.0, Decimal('37690'), Decimal('7285')], [2011.0, Decimal('40048'), Decimal('7860')], [2012.0, Decimal('41794'), Decimal('8272')], [2013.0, De

In [15]:
# Change the data type to prepare values for graphical representation
np_array_2 = np.array(sales_w_m_clothing_year)
np_array_int_2 = np.int_(np_array_2)
print(np_array_int_2.dtype)

int64


In [16]:
# Let's convert to DataFrame
df_3 = pd.DataFrame(np_array_int_2, columns=['sales_year', 'women_sales', 'mens_sales'])
print(df_3.head())

   sales_year  women_sales  mens_sales
0        1992        31815       10179
1        1993        32350        9962
2        1994        30585       10032
3        1995        28696        9315
4        1996        28238        9546


In [18]:
plot_3 = df_3.hvplot(kind='line',
        x='sales_year',
        y=['mens_sales','women_sales'],
        grid=True,
        title='Dynamics of sales in women\'s and men\'s clothing stores\nBy year',
        legend='top',
        width=690,
        height=400,
        shared_axes=False)
plot_3

# Percentage of sales in men's and women's clothing stores from their total volume by month

In [19]:
connection = create_connection(db_name, db_user, db_password, db_host, db_port)

cursor = connection.cursor(cursor_factory=DictCursor)  
# from psycopg2.extras import NamedTupleCursor | cursor_factory=NamedTupleCursor

# Getting table values retail_sales
cursor.execute("""
SELECT sales_month
,kind_of_business
,sales
,sum(sales) over (partition by sales_month) as total_sales
,sales * 100 / sum(sales) over (partition by sales_month) as pct_total
FROM retail_sales 
WHERE kind_of_business in ('Men''s clothing stores','Women''s clothing stores')
ORDER BY 1
;
""")
pct_sales_m_w_month = cursor.fetchall()
cursor.close()  # Close the cursor
connection.close()  # We close the connection
print(pct_sales_m_w_month)

Connection to PostgreSQL DB successful
[[datetime.date(1992, 1, 1), "Men's clothing stores", Decimal('701'), Decimal('2574'), Decimal('27.2338772338772339')], [datetime.date(1992, 1, 1), "Women's clothing stores", Decimal('1873'), Decimal('2574'), Decimal('72.7661227661227661')], [datetime.date(1992, 2, 1), "Men's clothing stores", Decimal('658'), Decimal('2649'), Decimal('24.8395620989052473')], [datetime.date(1992, 2, 1), "Women's clothing stores", Decimal('1991'), Decimal('2649'), Decimal('75.1604379010947527')], [datetime.date(1992, 3, 1), "Men's clothing stores", Decimal('731'), Decimal('3134'), Decimal('23.3248245054243778')], [datetime.date(1992, 3, 1), "Women's clothing stores", Decimal('2403'), Decimal('3134'), Decimal('76.6751754945756222')], [datetime.date(1992, 4, 1), "Men's clothing stores", Decimal('816'), Decimal('3481'), Decimal('23.4415397874174088')], [datetime.date(1992, 4, 1), "Women's clothing stores", Decimal('2665'), Decimal('3481'), Decimal('76.5584602125825912'

In [20]:
# Change the data type to prepare values for graphical representation
np_array_3 = np.array(pct_sales_m_w_month)
for i in np_array_3:
    i[4] = np.float32(i[4])
print(np_array_3)

[[datetime.date(1992, 1, 1) "Men's clothing stores" Decimal('701')
  Decimal('2574') 27.233877]
 [datetime.date(1992, 1, 1) "Women's clothing stores" Decimal('1873')
  Decimal('2574') 72.76612]
 [datetime.date(1992, 2, 1) "Men's clothing stores" Decimal('658')
  Decimal('2649') 24.839561]
 ...
 [datetime.date(2020, 11, 1) "Women's clothing stores" Decimal('2726')
  Decimal('2726') 100.0]
 [datetime.date(2020, 12, 1) "Men's clothing stores" Decimal('604')
  Decimal('4003') 15.088683]
 [datetime.date(2020, 12, 1) "Women's clothing stores" Decimal('3399')
  Decimal('4003') 84.911316]]


In [21]:
# Let's convert to DataFrame
df_4 = pd.DataFrame(np_array_3, columns=['sales_month', 'kind_of_business', 'sales', 'total_sales', 'pct_total'])
print(df_4.head())

  sales_month         kind_of_business sales total_sales  pct_total
0  1992-01-01    Men's clothing stores   701        2574  27.233877
1  1992-01-01  Women's clothing stores  1873        2574  72.766121
2  1992-02-01    Men's clothing stores   658        2649  24.839561
3  1992-02-01  Women's clothing stores  1991        2649  75.160439
4  1992-03-01    Men's clothing stores   731        3134  23.324825


In [22]:
plot_4 = df_4.hvplot(kind='line',
        x='sales_month',
        y='pct_total',
        by='kind_of_business',
        grid=True,
        title='Percentage of sales in men\'s and women\'s clothing stores\nfrom their total volume by month',
        legend='top',
        width=690,
        height=400,
        shared_axes=False)
plot_4

# Percentage of sales of women's and men's clothing from the annual volume for 2019

In [23]:
connection = create_connection(db_name, db_user, db_password, db_host, db_port)

cursor = connection.cursor(cursor_factory=DictCursor)  
# from psycopg2.extras import NamedTupleCursor | cursor_factory=NamedTupleCursor

# Getting table values retail_sales
cursor.execute("""
SELECT sales_month, kind_of_business, sales
,sum(sales) over (partition by date_part('year',sales_month), kind_of_business) as yearly_sales
,sales * 100 / sum(sales) over (partition by date_part('year',sales_month), kind_of_business) as pct_yearly
FROM retail_sales 
WHERE kind_of_business in ('Men''s clothing stores','Women''s clothing stores')
ORDER BY 1,2
;
""")
pct_sales_w_men_2019 = cursor.fetchall()
cursor.close()  # Close the cursor
connection.close()  # We close the connection
print(pct_sales_w_men_2019)

Connection to PostgreSQL DB successful
[[datetime.date(1992, 1, 1), "Men's clothing stores", Decimal('701'), Decimal('10179'), Decimal('6.8867275763827488')], [datetime.date(1992, 1, 1), "Women's clothing stores", Decimal('1873'), Decimal('31815'), Decimal('5.8871601445858872')], [datetime.date(1992, 2, 1), "Men's clothing stores", Decimal('658'), Decimal('10179'), Decimal('6.4642892229099126')], [datetime.date(1992, 2, 1), "Women's clothing stores", Decimal('1991'), Decimal('31815'), Decimal('6.2580543768662581')], [datetime.date(1992, 3, 1), "Men's clothing stores", Decimal('731'), Decimal('10179'), Decimal('7.1814520090382159')], [datetime.date(1992, 3, 1), "Women's clothing stores", Decimal('2403'), Decimal('31815'), Decimal('7.5530410183875530')], [datetime.date(1992, 4, 1), "Men's clothing stores", Decimal('816'), Decimal('10179'), Decimal('8.0165045682287062')], [datetime.date(1992, 4, 1), "Women's clothing stores", Decimal('2665'), Decimal('31815'), Decimal('8.3765519409083766'

In [24]:
# Change the data type to prepare values for graphical representation
np_array_4 = np.array(pct_sales_w_men_2019)
for i in np_array_4:
    i[4] = np.float32(i[4])
print(np_array_4)

[[datetime.date(1992, 1, 1) "Men's clothing stores" Decimal('701')
  Decimal('10179') 6.886728]
 [datetime.date(1992, 1, 1) "Women's clothing stores" Decimal('1873')
  Decimal('31815') 5.8871603]
 [datetime.date(1992, 2, 1) "Men's clothing stores" Decimal('658')
  Decimal('10179') 6.464289]
 ...
 [datetime.date(2020, 11, 1) "Women's clothing stores" Decimal('2726')
  Decimal('26526') 10.27671]
 [datetime.date(2020, 12, 1) "Men's clothing stores" Decimal('604')
  Decimal('3681') 16.408585]
 [datetime.date(2020, 12, 1) "Women's clothing stores" Decimal('3399')
  Decimal('26526') 12.813843]]


In [25]:
# Let's convert to DataFrame
df_5 = pd.DataFrame(np_array_4, columns=['sales_month', 
                                         'kind_of_business', 
                                         'sales', 
                                         'yearly_sales', 
                                         'pct_yearly'])
print(df_5.head())

  sales_month         kind_of_business sales yearly_sales pct_yearly
0  1992-01-01    Men's clothing stores   701        10179   6.886728
1  1992-01-01  Women's clothing stores  1873        31815    5.88716
2  1992-02-01    Men's clothing stores   658        10179   6.464289
3  1992-02-01  Women's clothing stores  1991        31815   6.258054
4  1992-03-01    Men's clothing stores   731        10179   7.181452


In [26]:
plot_5 = df_5.hvplot(kind='line',
        x='sales_month',
        y='pct_yearly',
        by='kind_of_business',
        grid=True,
        title='Percentage of sales of women\'s and men\'s clothing\nfrom the annual volume for 2019',
        legend='top',
        width=690,
        height=400,
        shared_axes=False)
plot_5

# Sales in men's and women's clothing stores, brought to 1992

In [27]:
connection = create_connection(db_name, db_user, db_password, db_host, db_port)

cursor = connection.cursor(cursor_factory=DictCursor)  
# from psycopg2.extras import NamedTupleCursor | cursor_factory=NamedTupleCursor

# Getting table values retail_sales
cursor.execute("""
SELECT sales_year, kind_of_business, sales
,(sales / first_value(sales) over (partition by kind_of_business order by sales_year) - 1) * 100 as pct_from_index
FROM
(
        SELECT date_part('year',sales_month) as sales_year
        ,kind_of_business
        ,sum(sales) as sales
        FROM retail_sales
        WHERE kind_of_business in ('Men''s clothing stores','Women''s clothing stores')  and sales_month <= '2019-12-31'
GROUP BY 1,2
) a
;
""")
sales_m_w_adjusted_1992 = cursor.fetchall()
cursor.close()  # Close the cursor
connection.close()  # We close the connection
print(sales_m_w_adjusted_1992)

Connection to PostgreSQL DB successful
[[1992.0, "Men's clothing stores", Decimal('10179'), Decimal('0E-20')], [1993.0, "Men's clothing stores", Decimal('9962'), Decimal('-2.13184006287454563300')], [1994.0, "Men's clothing stores", Decimal('10032'), Decimal('-1.44414972001178897700')], [1995.0, "Men's clothing stores", Decimal('9315'), Decimal('-8.48806366047745358100')], [1996.0, "Men's clothing stores", Decimal('9546'), Decimal('-6.21868552903035661700')], [1997.0, "Men's clothing stores", Decimal('10069'), Decimal('-1.08065625307004617300')], [1998.0, "Men's clothing stores", Decimal('10196'), Decimal('0.16701051183809804500')], [1999.0, "Men's clothing stores", Decimal('9667'), Decimal('-5.02996365065330582600')], [2000.0, "Men's clothing stores", Decimal('9507'), Decimal('-6.60182729148246389600')], [2001.0, "Men's clothing stores", Decimal('8625'), Decimal('-15.26672561155319776000')], [2002.0, "Men's clothing stores", Decimal('8112'), Decimal('-20.30651340996168582400')], [2003

In [28]:
# Change the data type to prepare values for graphical representation
np_array_5 = np.array(sales_m_w_adjusted_1992)
for i in np_array_5:
    i[3] = np.float32(i[3])
print(np_array_5)

[[1992.0 "Men's clothing stores" Decimal('10179') 0.0]
 [1993.0 "Men's clothing stores" Decimal('9962') -2.13184]
 [1994.0 "Men's clothing stores" Decimal('10032') -1.4441497]
 [1995.0 "Men's clothing stores" Decimal('9315') -8.488064]
 [1996.0 "Men's clothing stores" Decimal('9546') -6.2186856]
 [1997.0 "Men's clothing stores" Decimal('10069') -1.0806563]
 [1998.0 "Men's clothing stores" Decimal('10196') 0.16701052]
 [1999.0 "Men's clothing stores" Decimal('9667') -5.0299635]
 [2000.0 "Men's clothing stores" Decimal('9507') -6.601827]
 [2001.0 "Men's clothing stores" Decimal('8625') -15.266726]
 [2002.0 "Men's clothing stores" Decimal('8112') -20.306513]
 [2003.0 "Men's clothing stores" Decimal('8249') -18.960606]
 [2004.0 "Men's clothing stores" Decimal('8566') -15.846351]
 [2005.0 "Men's clothing stores" Decimal('8737') -14.166421]
 [2006.0 "Men's clothing stores" Decimal('8844') -13.115237]
 [2007.0 "Men's clothing stores" Decimal('8772') -13.822576]
 [2008.0 "Men's clothing stores

In [29]:
# Let's convert to DataFrame
df_6 = pd.DataFrame(np_array_5, columns=['sales_year', 
                                         'kind_of_business', 
                                         'sales', 
                                         'pct_from_index'])
print(df_6.head())

  sales_year       kind_of_business  sales pct_from_index
0     1992.0  Men's clothing stores  10179            0.0
1     1993.0  Men's clothing stores   9962       -2.13184
2     1994.0  Men's clothing stores  10032       -1.44415
3     1995.0  Men's clothing stores   9315      -8.488064
4     1996.0  Men's clothing stores   9546      -6.218686


In [30]:
plot_6 = df_6.hvplot(kind='line',
        x='sales_year',
        y='pct_from_index',
        by='kind_of_business',
        grid=True,
        title='Sales in men\'s and women\'s clothing stores,\nbrought to 1992',
        legend='top',
        width=690,
        height=400,
        shared_axes=False)
plot_6


In [31]:
delta_2 = (plot_3 + plot_4 + plot_5 + plot_6).cols(2)

delta_2

# The difference between sales in women's and men's clothing stores by year

In [32]:
connection = create_connection(db_name, db_user, db_password, db_host, db_port)

cursor = connection.cursor(cursor_factory=DictCursor)  
# from psycopg2.extras import NamedTupleCursor | cursor_factory=NamedTupleCursor

# Getting table values retail_sales
cursor.execute("""
SELECT date_part('year',sales_month) as sales_year
,sum(case when kind_of_business = 'Women''s clothing stores' then sales end) 
 - sum(case when kind_of_business = 'Men''s clothing stores' then sales end) as womens_minus_mens
FROM retail_sales
WHERE kind_of_business in ('Men''s clothing stores'
,'Women''s clothing stores')
and sales_month <= '2019-12-01'
GROUP BY 1
ORDER BY 1
;
""")
difference_sales_w_men_by_year = cursor.fetchall()
cursor.close()  # Close the cursor
connection.close()  # We close the connection
print(difference_sales_w_men_by_year)

Connection to PostgreSQL DB successful
[[1992.0, Decimal('21636')], [1993.0, Decimal('22388')], [1994.0, Decimal('20553')], [1995.0, Decimal('19381')], [1996.0, Decimal('18692')], [1997.0, Decimal('17753')], [1998.0, Decimal('18136')], [1999.0, Decimal('19882')], [2000.0, Decimal('21940')], [2001.0, Decimal('22828')], [2002.0, Decimal('23134')], [2003.0, Decimal('24316')], [2004.0, Decimal('26388')], [2005.0, Decimal('28338')], [2006.0, Decimal('29965')], [2007.0, Decimal('31522')], [2008.0, Decimal('30051')], [2009.0, Decimal('28702')], [2010.0, Decimal('30405')], [2011.0, Decimal('32188')], [2012.0, Decimal('33522')], [2013.0, Decimal('33117')], [2014.0, Decimal('32705')], [2015.0, Decimal('32255')], [2016.0, Decimal('32351')], [2017.0, Decimal('32362')], [2018.0, Decimal('32965')], [2019.0, Decimal('32880')]]


In [33]:
# Change the data type to prepare values for graphical representation
np_array_6 = np.array(difference_sales_w_men_by_year)
np_array_int_6 = np.int_(np_array_6)
print(np_array_int_6.dtype)

int64


In [34]:
# Let's convert to DataFrame
df_7 = pd.DataFrame(np_array_int_6, columns=['sales_year', 'women_minus_mens'])
print(df_7.head())

   sales_year  women_minus_mens
0        1992             21636
1        1993             22388
2        1994             20553
3        1995             19381
4        1996             18692


In [35]:
plot_7 = df_7.hvplot.area(
        x='sales_year',
        y='women_minus_mens',
        alpha=0.5,
        color='orange',
        grid=True,
        title='The difference between sales in women\'s and men\'s clothing stores by year',
        shared_axes=False)
plot_7

# Monthly sales and 12-month moving average for women's clothing stores

In [36]:
connection = create_connection(db_name, db_user, db_password, db_host, db_port)

cursor = connection.cursor(cursor_factory=DictCursor)  
# from psycopg2.extras import NamedTupleCursor | cursor_factory=NamedTupleCursor

# Getting table values retail_sales
cursor.execute("""
SELECT a.sales_month
,a.sales
,avg(b.sales) as moving_avg
,count(b.sales) as records_count
FROM retail_sales a
JOIN retail_sales b on a.kind_of_business = b.kind_of_business 
 and b.sales_month between a.sales_month - interval '11 months' 
 and a.sales_month
 and b.kind_of_business = 'Women''s clothing stores'
WHERE a.kind_of_business = 'Women''s clothing stores'
and a.sales_month >= '1993-01-01'
GROUP BY 1,2
ORDER BY 1
;
""")
monthly_sales_moving_avg_w = cursor.fetchall()
cursor.close()  # Close the cursor
connection.close()  # We close the connection
print(monthly_sales_moving_avg_w)

Connection to PostgreSQL DB successful
[[datetime.date(1993, 1, 1), Decimal('2123'), Decimal('2672.0833333333333333'), 12], [datetime.date(1993, 2, 1), Decimal('2005'), Decimal('2673.2500000000000000'), 12], [datetime.date(1993, 3, 1), Decimal('2442'), Decimal('2676.5000000000000000'), 12], [datetime.date(1993, 4, 1), Decimal('2762'), Decimal('2684.5833333333333333'), 12], [datetime.date(1993, 5, 1), Decimal('2873'), Decimal('2694.6666666666666667'), 12], [datetime.date(1993, 6, 1), Decimal('2552'), Decimal('2705.3333333333333333'), 12], [datetime.date(1993, 7, 1), Decimal('2539'), Decimal('2719.1666666666666667'), 12], [datetime.date(1993, 8, 1), Decimal('2626'), Decimal('2716.5833333333333333'), 12], [datetime.date(1993, 9, 1), Decimal('2622'), Decimal('2721.7500000000000000'), 12], [datetime.date(1993, 10, 1), Decimal('2713'), Decimal('2718.2500000000000000'), 12], [datetime.date(1993, 11, 1), Decimal('2923'), Decimal('2716.3333333333333333'), 12], [datetime.date(1993, 12, 1), Decim

In [37]:
# Change the data type to prepare values for graphical representation
np_array_7 = np.array(monthly_sales_moving_avg_w)
for i in np_array_7:
    i[1] = np.int_(i[1])
    i[2] = np.float32(i[2])
print(np_array_7)

[[datetime.date(1993, 1, 1) 2123 2672.0833 12]
 [datetime.date(1993, 2, 1) 2005 2673.25 12]
 [datetime.date(1993, 3, 1) 2442 2676.5 12]
 ...
 [datetime.date(2020, 10, 1) 2634 2395.5833 12]
 [datetime.date(2020, 11, 1) 2726 2301.9167 12]
 [datetime.date(2020, 12, 1) 3399 2210.5 12]]


In [38]:
# Let's convert to DataFrame
df_8 = pd.DataFrame(np_array_7, columns=['sales_month', 
                                         'sales', 
                                         'moving_avg',
                                         'records_count'])
print(df_8.head())

  sales_month sales   moving_avg records_count
0  1993-01-01  2123  2672.083252            12
1  1993-02-01  2005      2673.25            12
2  1993-03-01  2442       2676.5            12
3  1993-04-01  2762  2684.583252            12
4  1993-05-01  2873  2694.666748            12


In [39]:
plot_8 = df_8.hvplot(kind='line',
        x='sales_month',
        y=['sales', 'moving_avg'],
        grid=True,
        title='Monthly sales and 12-month moving average for women\'s clothing stores',
        legend='top',
        shared_axes=False)
plot_8

# Monthly sales and cumulative annual sales for women's clothing stores

In [40]:
connection = create_connection(db_name, db_user, db_password, db_host, db_port)

cursor = connection.cursor(cursor_factory=DictCursor)  
# from psycopg2.extras import NamedTupleCursor | cursor_factory=NamedTupleCursor

# Getting table values retail_sales
cursor.execute("""
SELECT sales_month
,sales
,sum(sales) over (partition by date_part('year',sales_month) order by sales_month) as sales_ytd
FROM retail_sales
WHERE kind_of_business = 'Women''s clothing stores' AND sales_month BETWEEN %s AND %s
;""",
('2016-01-01', '2020-12-01')) # используем второй аргумент execute() для дополнения запроса
month_sales_sales_ytd_w_stores = cursor.fetchall()
cursor.close()  # Close the cursor
connection.close()  # We close the connection
print(month_sales_sales_ytd_w_stores)

Connection to PostgreSQL DB successful
[[datetime.date(2016, 1, 1), Decimal('2477'), Decimal('2477')], [datetime.date(2016, 2, 1), Decimal('2939'), Decimal('5416')], [datetime.date(2016, 3, 1), Decimal('3659'), Decimal('9075')], [datetime.date(2016, 4, 1), Decimal('3519'), Decimal('12594')], [datetime.date(2016, 5, 1), Decimal('3606'), Decimal('16200')], [datetime.date(2016, 6, 1), Decimal('3240'), Decimal('19440')], [datetime.date(2016, 7, 1), Decimal('3141'), Decimal('22581')], [datetime.date(2016, 8, 1), Decimal('3388'), Decimal('25969')], [datetime.date(2016, 9, 1), Decimal('3170'), Decimal('29139')], [datetime.date(2016, 10, 1), Decimal('3274'), Decimal('32413')], [datetime.date(2016, 11, 1), Decimal('3718'), Decimal('36131')], [datetime.date(2016, 12, 1), Decimal('4714'), Decimal('40845')], [datetime.date(2017, 1, 1), Decimal('2454'), Decimal('2454')], [datetime.date(2017, 2, 1), Decimal('2763'), Decimal('5217')], [datetime.date(2017, 3, 1), Decimal('3485'), Decimal('8702')], [da

In [41]:
# Change the data type to prepare values for graphical representation
np_array_8 = np.array(month_sales_sales_ytd_w_stores)
for i in np_array_8:
    i[1] = np.int_(i[1])
    i[2] = np.float32(i[2])
print(np_array_8)

[[datetime.date(2016, 1, 1) 2477 2477.0]
 [datetime.date(2016, 2, 1) 2939 5416.0]
 [datetime.date(2016, 3, 1) 3659 9075.0]
 [datetime.date(2016, 4, 1) 3519 12594.0]
 [datetime.date(2016, 5, 1) 3606 16200.0]
 [datetime.date(2016, 6, 1) 3240 19440.0]
 [datetime.date(2016, 7, 1) 3141 22581.0]
 [datetime.date(2016, 8, 1) 3388 25969.0]
 [datetime.date(2016, 9, 1) 3170 29139.0]
 [datetime.date(2016, 10, 1) 3274 32413.0]
 [datetime.date(2016, 11, 1) 3718 36131.0]
 [datetime.date(2016, 12, 1) 4714 40845.0]
 [datetime.date(2017, 1, 1) 2454 2454.0]
 [datetime.date(2017, 2, 1) 2763 5217.0]
 [datetime.date(2017, 3, 1) 3485 8702.0]
 [datetime.date(2017, 4, 1) 3624 12326.0]
 [datetime.date(2017, 5, 1) 3616 15942.0]
 [datetime.date(2017, 6, 1) 3341 19283.0]
 [datetime.date(2017, 7, 1) 3199 22482.0]
 [datetime.date(2017, 8, 1) 3293 25775.0]
 [datetime.date(2017, 9, 1) 3122 28897.0]
 [datetime.date(2017, 10, 1) 3299 32196.0]
 [datetime.date(2017, 11, 1) 3774 35970.0]
 [datetime.date(2017, 12, 1) 4690 4

In [42]:
# Let's convert to DataFrame
df_9 = pd.DataFrame(np_array_8, columns=['sales_month', 
                                         'sales', 
                                         'sales_ytd'])
print(df_9.head())

  sales_month sales sales_ytd
0  2016-01-01  2477    2477.0
1  2016-02-01  2939    5416.0
2  2016-03-01  3659    9075.0
3  2016-04-01  3519   12594.0
4  2016-05-01  3606   16200.0


In [43]:
plot_a = df_9.hvplot(kind='line',
        x='sales_month',
        y='sales',
        grid=True,
        shared_axes=False)


plot_b = df_9.hvplot(kind='bar',
         x='sales_month',
         y='sales_ytd',
         alpha=0.6,
         color='red',
         grid=True,
        shared_axes=False,
        rot=60)

delta = (plot_a + plot_b).cols(1)

delta

# Retail sales in bookstores: YoY difference and percentage change in YoY

In [44]:
connection = create_connection(db_name, db_user, db_password, db_host, db_port)

cursor = connection.cursor(cursor_factory=DictCursor)  
# from psycopg2.extras import NamedTupleCursor | cursor_factory=NamedTupleCursor

# Getting table values retail_sales
cursor.execute("""
SELECT sales_month, sales
,coalesce(sales - lag(sales) over (partition by date_part('month',sales_month) order by sales_month), 208) as absolute_diff
,coalesce((sales / lag(sales) over (partition by date_part('month',sales_month) order by sales_month) - 1) * 100, 26.32) as pct_diff
FROM retail_sales
WHERE kind_of_business = 'Book stores'
GROUP BY 1,2
ORDER BY 1
;
""")
retail_sales_in_bookstores = cursor.fetchall()
cursor.close()  # Close the cursor
connection.close()  # We close the connection
print(retail_sales_in_bookstores)

Connection to PostgreSQL DB successful
[[datetime.date(1992, 1, 1), Decimal('790'), Decimal('208'), Decimal('26.32')], [datetime.date(1992, 2, 1), Decimal('539'), Decimal('208'), Decimal('26.32')], [datetime.date(1992, 3, 1), Decimal('535'), Decimal('208'), Decimal('26.32')], [datetime.date(1992, 4, 1), Decimal('523'), Decimal('208'), Decimal('26.32')], [datetime.date(1992, 5, 1), Decimal('552'), Decimal('208'), Decimal('26.32')], [datetime.date(1992, 6, 1), Decimal('589'), Decimal('208'), Decimal('26.32')], [datetime.date(1992, 7, 1), Decimal('592'), Decimal('208'), Decimal('26.32')], [datetime.date(1992, 8, 1), Decimal('894'), Decimal('208'), Decimal('26.32')], [datetime.date(1992, 9, 1), Decimal('861'), Decimal('208'), Decimal('26.32')], [datetime.date(1992, 10, 1), Decimal('645'), Decimal('208'), Decimal('26.32')], [datetime.date(1992, 11, 1), Decimal('642'), Decimal('208'), Decimal('26.32')], [datetime.date(1992, 12, 1), Decimal('1165'), Decimal('208'), Decimal('26.32')], [datetim

In [45]:
# Change the data type to prepare values for graphical representation
np_array_9 = np.array(retail_sales_in_bookstores)
for i in np_array_9:
    i[1] = np.float32(i[1])
    i[2] = np.int_(i[2])
    i[3] = np.float32(i[3])
print(np_array_9)

[[datetime.date(1992, 1, 1) 790.0 208 26.32]
 [datetime.date(1992, 2, 1) 539.0 208 26.32]
 [datetime.date(1992, 3, 1) 535.0 208 26.32]
 ...
 [datetime.date(2020, 10, 1) 455.0 -172 -27.432217]
 [datetime.date(2020, 11, 1) 496.0 -122 -19.7411]
 [datetime.date(2020, 12, 1) 900.0 -137 -13.211186]]


In [47]:
# Let's convert to DataFrame
df_10 = pd.DataFrame(np_array_9, columns=['sales_month', 
                                         'sales', 
                                         'absolute_diff',
                                         'pct_diff'])
print(df_10.head())

  sales_month  sales absolute_diff pct_diff
0  1992-01-01  790.0           208    26.32
1  1992-02-01  539.0           208    26.32
2  1992-03-01  535.0           208    26.32
3  1992-04-01  523.0           208    26.32
4  1992-05-01  552.0           208    26.32


In [48]:
plot_c = df_10.hvplot(kind='line',
        x='sales_month',
        y='sales',
        grid=True,
        height=400,
        shared_axes=False)
plot_d = df_10.hvplot(kind='line',
        x='sales_month',
        y='absolute_diff',
        grid=True,
        width=600,
        height=400,
        shared_axes=False)
plot_e = df_10.hvplot(kind='line',
        x='sales_month',
        y='pct_diff',
        grid=True,
        width=600,
        height=400,
        shared_axes=False)

ampyla = (plot_c + plot_d + plot_e)

ampyla

# Retail sales in bookstores for 1992-1994, lined up by month

In [49]:
connection = create_connection(db_name, db_user, db_password, db_host, db_port)

cursor = connection.cursor(cursor_factory=DictCursor)  
# from psycopg2.extras import NamedTupleCursor | cursor_factory=NamedTupleCursor

# Getting table values retail_sales
cursor.execute("""
SELECT date_part('month',sales_month) as month_number
,to_char(sales_month,'Month') as month_name
,max(case when date_part('year',sales_month) = 1992 then sales end) as sales_1992
,max(case when date_part('year',sales_month) = 1993 then sales end) as sales_1993
,max(case when date_part('year',sales_month) = 1994 then sales end) as sales_1994
FROM retail_sales
WHERE kind_of_business = 'Book stores' and sales_month between '1992-01-01' and '1994-12-01'
GROUP BY 1,2
;
""")
retail_sales_bookstores_1992_1994 = cursor.fetchall()
cursor.close()  # Close the cursor
connection.close()  # We close the connection
print(retail_sales_bookstores_1992_1994)

Connection to PostgreSQL DB successful
[[1.0, 'January  ', Decimal('790'), Decimal('998'), Decimal('1053')], [2.0, 'February ', Decimal('539'), Decimal('568'), Decimal('635')], [3.0, 'March    ', Decimal('535'), Decimal('602'), Decimal('634')], [4.0, 'April    ', Decimal('523'), Decimal('583'), Decimal('610')], [5.0, 'May      ', Decimal('552'), Decimal('612'), Decimal('684')], [6.0, 'June     ', Decimal('589'), Decimal('618'), Decimal('724')], [7.0, 'July     ', Decimal('592'), Decimal('607'), Decimal('678')], [8.0, 'August   ', Decimal('894'), Decimal('983'), Decimal('1154')], [9.0, 'September', Decimal('861'), Decimal('903'), Decimal('1022')], [10.0, 'October  ', Decimal('645'), Decimal('669'), Decimal('732')], [11.0, 'November ', Decimal('642'), Decimal('692'), Decimal('772')], [12.0, 'December ', Decimal('1165'), Decimal('1273'), Decimal('1409')]]


In [50]:
# Change the data type to prepare values for graphical representation
np_array_10 = np.array(retail_sales_bookstores_1992_1994)
for i in np_array_10:
    i[2] = np.float32(i[2])
    i[3] = np.float32(i[3])
    i[4] = np.float32(i[4])
print(np_array_10)

[[1.0 'January  ' 790.0 998.0 1053.0]
 [2.0 'February ' 539.0 568.0 635.0]
 [3.0 'March    ' 535.0 602.0 634.0]
 [4.0 'April    ' 523.0 583.0 610.0]
 [5.0 'May      ' 552.0 612.0 684.0]
 [6.0 'June     ' 589.0 618.0 724.0]
 [7.0 'July     ' 592.0 607.0 678.0]
 [8.0 'August   ' 894.0 983.0 1154.0]
 [9.0 'September' 861.0 903.0 1022.0]
 [10.0 'October  ' 645.0 669.0 732.0]
 [11.0 'November ' 642.0 692.0 772.0]
 [12.0 'December ' 1165.0 1273.0 1409.0]]


In [51]:
# Let's convert to DataFrame
df_11 = pd.DataFrame(np_array_10, columns=['month_number', 
                                         'month_name', 
                                         'sales_1992',
                                         'sales_1993',
                                         'sales_1994'])
print(df_11.head())

  month_number month_name sales_1992 sales_1993 sales_1994
0          1.0  January        790.0      998.0     1053.0
1          2.0  February       539.0      568.0      635.0
2          3.0  March          535.0      602.0      634.0
3          4.0  April          523.0      583.0      610.0
4          5.0  May            552.0      612.0      684.0


In [52]:
plot_9 = df_11.hvplot(kind='line',
        x='month_number',
        y=['sales_1992', 'sales_1993', 'sales_1994'],
        grid=True,
        title='Retail sales in bookstores for 1992-1994, lined up by month',
        legend='top',
        shared_axes=False)
plot_9

# Preparation of the dashboard

In [57]:
template = pn.template.FastListTemplate(theme=DarkTheme,
    title = 'Time series queries',
    sidebar=[
        pn.pane.Markdown('# About the project'),
        pn.pane.Markdown('### This project uses data available on [Census.gov](https://www.census.gov/retail/index.html#mrts). The data from this report is used as an economic indicator to understand consumer spending trends in the United States. The data cover the period from 1992 to 2020 and include both total sales and detailing by retail sales category. The project also uses different types of visualization, such as: [Panel](https://panel.holoviz.org/), [hvPlot](https://hvplot.holoviz.org/).'),
        pn.pane.Markdown('### You can also see other works placed in the repository on Github.')
    ],
    main=[pn.pane.Markdown('# Retail sales in the USA'),
          pn.Row(pn.Column(plot_1.opts(width=1350, height=400), 
                           plot_2.opts(width=1350, height=400), 
                           plot_9.opts(width=1350, height=400))),
          
          pn.pane.Markdown('# Sales in men\'s and women\'s clothing stores'),
          pn.Row(delta_2.opts(width=1350, height=400)),
          
          pn.Row(pn.Column(plot_7.opts(width=1350, height=400), plot_8.opts(width=1350, height=400))),
          
          pn.pane.Markdown('# Monthly sales and cumulative annual sales for women\'s clothing stores'),
          pn.Row(pn.Column(plot_a.opts(width=1350, height=400), plot_b.opts(width=1350, height=400))),
          
          pn.pane.Markdown('# Retail sales in bookstores: YoY difference and percentage change in YoY'),
          pn.Row(ampyla.opts(width=1350, height=400)),
    ],
    accent_base_color='#d78929',
    header_background='#d78929',
    sidebar_footer='<br><br><a href="https://github.com/DanilaPastukhov00">GitHub Repository</a>',       
    main_max_width='100%'                                        
)

template.servable();

In [58]:
# It shows the dashboard in another window 
template.show()

Launching server at http://localhost:64939


<panel.io.server.Server at 0x128502150>