# Imports

In [3]:
# !pip install "dash[testing]"

Collecting dash[testing]
  Obtaining dependency information for dash[testing] from https://files.pythonhosted.org/packages/27/ad/7047095224013ec2ae37ba8ece5956773e7953c39a3af5aa20d821ed99aa/dash-2.17.1-py3-none-any.whl.metadata
  Downloading dash-2.17.1-py3-none-any.whl.metadata (10 kB)
Collecting dash-html-components==2.0.0 (from dash[testing])
  Obtaining dependency information for dash-html-components==2.0.0 from https://files.pythonhosted.org/packages/75/65/1b16b853844ef59b2742a7de74a598f376ac0ab581f0dcc34db294e5c90e/dash_html_components-2.0.0-py3-none-any.whl.metadata
  Downloading dash_html_components-2.0.0-py3-none-any.whl.metadata (3.8 kB)
Collecting dash-core-components==2.0.0 (from dash[testing])
  Obtaining dependency information for dash-core-components==2.0.0 from https://files.pythonhosted.org/packages/00/9e/a29f726e84e531a36d56cff187e61d8c96d2cc253c5bcef9a7695acb7e6a/dash_core_components-2.0.0-py3-none-any.whl.metadata
  Downloading dash_core_components-2.0.0-py3-none-an

In [93]:
import pandas as pd
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.express as px

# Reading csv files

In [35]:
# reading files
first_daily_sales = pd.read_csv('./data/daily_sales_data_0.csv')
second_daily_sales = pd.read_csv('./data/daily_sales_data_1.csv')
third_daily_sales = pd.read_csv('./data/daily_sales_data_2.csv')

In [38]:
# first transactions
print(first_daily_sales.shape)
first_daily_sales.head()

(13720, 5)


Unnamed: 0,product,price,quantity,date,region
0,pink morsel,$3.00,546,2018-02-06,north
1,pink morsel,$3.00,549,2018-02-06,south
2,pink morsel,$3.00,577,2018-02-06,east
3,pink morsel,$3.00,519,2018-02-06,west
4,gold morsel,$9.99,580,2018-02-06,north


In [39]:
# second transactions
print(second_daily_sales.shape)
second_daily_sales.head()

(13720, 5)


Unnamed: 0,product,price,quantity,date,region
0,pink morsel,$3.00,545,2019-06-11,north
1,pink morsel,$3.00,521,2019-06-11,south
2,pink morsel,$3.00,595,2019-06-11,east
3,pink morsel,$3.00,507,2019-06-11,west
4,gold morsel,$9.99,529,2019-06-11,north


In [40]:
# third day transactions
print(third_daily_sales.shape)
third_daily_sales.head()

(13720, 5)


Unnamed: 0,product,price,quantity,date,region
0,pink morsel,$3.00,526,2020-10-13,north
1,pink morsel,$3.00,546,2020-10-13,south
2,pink morsel,$3.00,505,2020-10-13,east
3,pink morsel,$3.00,561,2020-10-13,west
4,gold morsel,$9.99,553,2020-10-13,north


In [196]:
# all csv transactions combined
daily_sales = pd.concat([first_daily_sales, second_daily_sales, third_daily_sales])
print(daily_sales.shape)
daily_sales.head()

(41160, 5)


Unnamed: 0,product,price,quantity,date,region
0,pink morsel,$3.00,546,2018-02-06,north
1,pink morsel,$3.00,549,2018-02-06,south
2,pink morsel,$3.00,577,2018-02-06,east
3,pink morsel,$3.00,519,2018-02-06,west
4,gold morsel,$9.99,580,2018-02-06,north


# Data processing

In [197]:
# checking for null values, dtypes, and values consistencies
daily_sales.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 41160 entries, 0 to 13719
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   product   41160 non-null  object
 1   price     41160 non-null  object
 2   quantity  41160 non-null  int64 
 3   date      41160 non-null  object
 4   region    41160 non-null  object
dtypes: int64(1), object(4)
memory usage: 1.9+ MB


In [198]:
daily_sales['product'].unique()

array(['pink morsel', 'gold morsel', 'magenta morsel',
       'chartreuse morsel', 'periwinkle morsel', 'vermilion morsel',
       'lapis morsel'], dtype=object)

In [199]:
daily_sales['price'] = daily_sales['price'].str.replace("$", "", regex=False).astype(float)
daily_sales['price'].unique()

array([3.  , 9.99, 2.5 , 1.25, 4.99, 1.99, 5.  ])

In [200]:
daily_sales['quantity'] = daily_sales['quantity'].astype(int)
daily_sales['quantity'].unique()

array([546, 549, 577, 519, 580, 530, 576, 522, 560, 583, 552, 543, 574,
       503, 506, 559, 533, 568, 556, 573, 544, 532, 585, 529, 566, 514,
       545, 512, 524, 594, 596, 537, 547, 501, 548, 578, 504, 561, 581,
       550, 518, 558, 536, 534, 567, 590, 515, 557, 600, 542, 535, 510,
       579, 511, 500, 520, 565, 586, 554, 505, 597, 584, 553, 588, 540,
       591, 525, 575, 509, 517, 523, 562, 569, 571, 599, 570, 521, 508,
       516, 589, 572, 551, 541, 531, 502, 507, 538, 564, 527, 598, 595,
       582, 526, 555, 593, 563, 528, 539, 513, 587, 592, 405, 439, 436,
       479, 414, 446, 408, 476, 450, 434, 470, 490, 431, 409, 466, 472,
       483, 480, 415, 440, 444, 460, 424, 430, 422, 429, 468, 478, 487,
       473, 484, 499, 489, 461, 454, 402, 445, 447, 417, 498, 467, 401,
       495, 464, 421, 418, 406, 400, 497, 491, 493, 458, 433, 404, 403,
       449, 425, 481, 496, 410, 482, 465, 416, 437, 459, 428, 438, 455,
       453, 413, 469, 474, 471, 419, 435, 448, 423, 488, 432, 40

In [201]:
daily_sales['date'] = pd.to_datetime(daily_sales['date'])
daily_sales['date'].unique()

array(['2018-02-06T00:00:00.000000000', '2018-02-07T00:00:00.000000000',
       '2018-02-08T00:00:00.000000000', ...,
       '2022-02-12T00:00:00.000000000', '2022-02-13T00:00:00.000000000',
       '2022-02-14T00:00:00.000000000'], dtype='datetime64[ns]')

In [202]:
daily_sales['region'].unique()

array(['north', 'south', 'east', 'west'], dtype=object)

In [203]:
daily_sales.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 41160 entries, 0 to 13719
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   product   41160 non-null  object        
 1   price     41160 non-null  float64       
 2   quantity  41160 non-null  int32         
 3   date      41160 non-null  datetime64[ns]
 4   region    41160 non-null  object        
dtypes: datetime64[ns](1), float64(1), int32(1), object(2)
memory usage: 1.7+ MB


# Dashboard

In [205]:
# adding features to use later
daily_sales['profit'] = daily_sales['price'] * daily_sales['quantity']
daily_sales['year_month'] = daily_sales['date'].dt.to_period('M')
daily_sales['year'] = daily_sales['date'].dt.year

# filter for 'pink morsels'
pink_morsels_df = daily_sales[daily_sales['product'] == 'pink morsel']
pink_morsels_df.head()

Unnamed: 0,product,price,quantity,date,region,profit,year_month,year
0,pink morsel,3.0,546,2018-02-06,north,1638.0,2018-02,2018
1,pink morsel,3.0,549,2018-02-06,south,1647.0,2018-02,2018
2,pink morsel,3.0,577,2018-02-06,east,1731.0,2018-02,2018
3,pink morsel,3.0,519,2018-02-06,west,1557.0,2018-02,2018
28,pink morsel,3.0,529,2018-02-07,north,1587.0,2018-02,2018


In [215]:
# Initialize the Dash app
app = dash.Dash(__name__)

# Define the layout
app.layout = html.Div([
    html.H1("Pink Morsel Sales Dashboard"),
    dcc.Graph(id='profit-line-graph'),
    dcc.Graph(id='qty-line-graph'),
    dcc.Graph(id='profitability'),
    dcc.Store(id='pink-morsels-data', data=pink_morsels_df.reset_index(drop=True).to_json())
])


# Define callback to update graphs
@app.callback(
    [Output('profit-line-graph', 'figure'),
     Output('qty-line-graph', 'figure'),
     Output('profitability', 'figure')],
    [Input('pink-morsels-data', 'data')]
)
def update_graphs(pink_morsels_json):
    # Load the DataFrame from JSON
    pink_morsels_df = pd.read_json(pink_morsels_json)

    # Profit per Month
    pink_morsels_df['year_month'] = pink_morsels_df['year_month'].apply(lambda x: pd.to_datetime(f"{x['year']}-{x['month']}"))
    sales_line_fig = px.line(
        pink_morsels_df[['year_month', 'profit']].groupby('year_month')['profit'].mean().reset_index(), x='year_month', y='profit', title='Pink Morsel Profits Over Months',
        hover_data={'year_month': True, 'profit': True}
    )

    # Quantity Sold per Month
    qty_line_fig = px.line(
        pink_morsels_df[['year_month', 'quantity']].groupby('year_month')['quantity'].sum().reset_index(), x='year_month', y='quantity', title='Pink Morsel QTY. Sold Over Months',
        hover_data={'year_month': True, 'quantity': True}
    )

    # Profitability per region
    profitability_fig = px.bar(
        pink_morsels_df.groupby('region')['profit'].mean(),
        title='Profit per Region',
        barmode='group'
    )

    return sales_line_fig, qty_line_fig, profitability_fig


# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)