# Visualization using Python


In [1]:
# installing package
! pip install pandas


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
# installing package
! pip install pandasql

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pandasql
  Downloading pandasql-0.7.3.tar.gz (26 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pandasql
  Building wheel for pandasql (setup.py) ... [?25l[?25hdone
  Created wheel for pandasql: filename=pandasql-0.7.3-py3-none-any.whl size=26787 sha256=bf601eff4eba235931eeb763f116980c737b731ca7ca4261aa735c41d1ee3837
  Stored in directory: /root/.cache/pip/wheels/ed/8f/46/a383923333728744f01ba24adbd8e364f2cb9470a8b8e5b9ff
Successfully built pandasql
Installing collected packages: pandasql
Successfully installed pandasql-0.7.3


In [3]:
# installing package
! pip install plotly

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [5]:
# installing package
! pip install dash

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [6]:
# Library data manipulation
import pandas as pd
import pandasql as ps


# Library Data Visualization
import plotly.express as px
import plotly.graph_objects as go

# Library Dashboarding
import dash
from dash import html 
from dash import dcc 

# Callback (input + output)
from dash import Input
from dash import Output


### <font color='darkred'>B. Dataset</font>

Berikut ini tampilan data transaksi supermarket kita sepanjang tahun 2019, lengkap dengan masing-masing penjelasannya untuk tiap kolom.
* `order_id` : ID dari order/transaksi, 1 transaksi bisa terdiri dari beberapa produk, tetapi hanya dilakukan oleh 1 customer
* `order_date` : tanggal terjadinya transaksi
* `customer_id` : ID dari pembeli, bisa jadi dalam satu hari, 1 customer melakukan transaksi beberapa kali
* `city` : kota tempat toko terjadinya transaksi
* `province` : provinsi (berdasarkan city)
* `product_id` : ID dari suatu product yang dibeli
* `brand` : brand/merk dari product. Suatu product yang sama pasti memiliki brand yang sama
* `quantity` : Kuantitas/banyaknya product yang dibeli
* `item_price` : Harga dari 1 product (dalam Rupiah). Suatu product yang sama, bisa jadi memiliki harga yang berbeda saat dibeli


location data:
https://storage.googleapis.com/dqlab-dataset/retail_raw_reduced.csv

In [7]:
# import data
dataset = pd.read_csv ('https://storage.googleapis.com/dqlab-dataset/retail_raw_reduced.csv')

In [8]:
dataset

Unnamed: 0,order_id,order_date,customer_id,city,province,product_id,brand,quantity,item_price
0,1703458,2019-10-17,14004,Jakarta Selatan,DKI Jakarta,P1910,BRAND_J,10,740000
1,1706815,2019-10-24,17220,Jakarta Selatan,DKI Jakarta,P2934,BRAND_R,2,604000
2,1710718,2019-11-03,16518,Jakarta Utara,DKI Jakarta,P0908,BRAND_C,8,1045000
3,1683592,2019-08-19,16364,Jakarta Barat,DKI Jakarta,P0128,BRAND_A,4,205000
4,1702573,2019-10-16,15696,Jakarta Timur,DKI Jakarta,P2968,BRAND_R,2,4475000
...,...,...,...,...,...,...,...,...,...
4995,1724011,2019-12-01,12838,Tangerang,Banten,P3047,BRAND_R,2,450000
4996,1676302,2019-07-28,13833,Bogor,Jawa Barat,P0760,BRAND_C,3,1465000
4997,1706071,2019-10-23,16332,Jakarta Timur,DKI Jakarta,P1681,BRAND_H,4,747000
4998,1703620,2019-10-17,13055,Jakarta Barat,DKI Jakarta,P0757,BRAND_C,8,695000


### <font color='darkred'>C. Plotting with Plotly</font>

In [10]:
# penambahahan variable (kolom baru) GMV 
dataset['gmv'] = dataset['item_price']*dataset['quantity']
dataset.head()

Unnamed: 0,order_id,order_date,customer_id,city,province,product_id,brand,quantity,item_price,gmv
0,1703458,2019-10-17,14004,Jakarta Selatan,DKI Jakarta,P1910,BRAND_J,10,740000,7400000
1,1706815,2019-10-24,17220,Jakarta Selatan,DKI Jakarta,P2934,BRAND_R,2,604000,1208000
2,1710718,2019-11-03,16518,Jakarta Utara,DKI Jakarta,P0908,BRAND_C,8,1045000,8360000
3,1683592,2019-08-19,16364,Jakarta Barat,DKI Jakarta,P0128,BRAND_A,4,205000,820000
4,1702573,2019-10-16,15696,Jakarta Timur,DKI Jakarta,P2968,BRAND_R,2,4475000,8950000


In [11]:
# penambahan variable (kolom baru) month 
dataset['order_month'] = dataset['order_date'].str.slice(0,7)
dataset.head()

Unnamed: 0,order_id,order_date,customer_id,city,province,product_id,brand,quantity,item_price,gmv,order_month
0,1703458,2019-10-17,14004,Jakarta Selatan,DKI Jakarta,P1910,BRAND_J,10,740000,7400000,2019-10
1,1706815,2019-10-24,17220,Jakarta Selatan,DKI Jakarta,P2934,BRAND_R,2,604000,1208000,2019-10
2,1710718,2019-11-03,16518,Jakarta Utara,DKI Jakarta,P0908,BRAND_C,8,1045000,8360000,2019-11
3,1683592,2019-08-19,16364,Jakarta Barat,DKI Jakarta,P0128,BRAND_A,4,205000,820000,2019-08
4,1702573,2019-10-16,15696,Jakarta Timur,DKI Jakarta,P2968,BRAND_R,2,4475000,8950000,2019-10


In [12]:
# Totalkan GMV berdasarkan bulan, buat dataframe baru
# buat dataframe baru untuk line chart
monthly_agg_df = dataset.groupby('order_month')['gmv'].sum().reset_index()
monthly_agg_df

Unnamed: 0,order_month,gmv
0,2019-07,3524041000
1,2019-08,4452923000
2,2019-09,3947002000
3,2019-10,6719937000
4,2019-11,6182229000
5,2019-12,8148235000


In [13]:
# Library Data Visualization
import plotly.express as px
import plotly.graph_objects as go


In [14]:
# create plot
# Define chart using object go
fig = go.Figure (go.Scatter (x = monthly_agg_df ['order_month'], y = monthly_agg_df ['gmv']))
# give Title
fig.update_layout(title='GMV Value in 2019')


#show plot
fig.show()




# Using Express



In [15]:
# create plot
fig = px.line(monthly_agg_df, x="order_month", y = "gmv", title = "GMV Value in 2019")

#show plot
fig.show()


#  **Update X & Y label**








In [16]:
# create plot
fig = px.line(monthly_agg_df, x="order_month", y = "gmv", title = "GMV Value in 2019", markers = "True")

# update x & y label
fig.update_yaxes (title_font=dict(size = 22, family = 'arial', color = 'pink'))
fig.update_xaxes (title_font=dict(size = 22, family = 'arial', color = 'purple'))

#show plot
fig.show()


# Custom X & Y Ticks

In [17]:
# create plot
fig = px.line(monthly_agg_df, x="order_month", y = "gmv", title = "GMV Value in 2019", markers = True)

# update x & y label
fig.update_yaxes (title_font=dict(size = 22, family = 'arial', color = 'pink'))
fig.update_xaxes (title_font=dict(size = 22, family = 'arial', color = 'purple'))

# custom X & Y Ticks
fig.update_yaxes(tickvals = [4000000000, 6000000000, 8000000000])
fig.update_xaxes(tickangle=45, tickfont=dict(family = 'arial', color = 'red', size = 14))

#show plot
fig.show()





# Styling grid lines

In [18]:
# create plot

fig = px.line(monthly_agg_df, x="order_month", y = "gmv", title = "GMV Value in 2019", markers = True)

# update x & y label
fig.update_yaxes (title_font=dict(size = 22, family = 'arial', color = 'pink'))
fig.update_xaxes (title_font=dict(size = 22, family = 'arial', color = 'purple'))

# custom X & Y Ticks
fig.update_yaxes(tickvals = [4000000000, 6000000000, 8000000000])
fig.update_xaxes(tickangle=45, tickfont=dict(family = 'arial', color = 'red', size = 14))

#styling Grid lines
fig.update_yaxes (showgrid=False)
fig.update_xaxes (showgrid=True, gridwidth=2, gridcolor='violet')

#show plot
fig.show()





# Custom Line & Marker

In [19]:
# create plot

fig = px.line(monthly_agg_df, x="order_month", y = "gmv", title = "GMV Value in 2019", markers = True)

# update x & y label
fig.update_yaxes (title_font=dict(size = 22, family = 'arial', color = 'pink'))
fig.update_xaxes (title_font=dict(size = 22, family = 'arial', color = 'purple'))

# custom X & Y Ticks
fig.update_yaxes(tickvals = [4000000000, 6000000000, 8000000000])
fig.update_xaxes(tickangle=45, tickfont=dict(family = 'arial', color = 'red', size = 14))

#styling Grid lines
fig.update_yaxes (showgrid=False)
fig.update_xaxes (showgrid=True, gridwidth=2, gridcolor='violet')

# custom Line and Marker
fig.update_traces(line=dict(color='brown', width=3), marker=dict(color='grey', size=10))

#show plot
fig.show()





# C.1.2.5 Custom Title

In [20]:

# create plot

fig = px.line(monthly_agg_df, x="order_month", y = "gmv", title = "GMV Value in 2019", markers = True)

# update x & y label
fig.update_yaxes (title_font=dict(size = 22, family = 'arial', color = 'pink'))
fig.update_xaxes (title_font=dict(size = 22, family = 'arial', color = 'purple'))

# custom X & Y Ticks
fig.update_yaxes(tickvals = [4000000000, 6000000000, 8000000000])
fig.update_xaxes(tickangle=45, tickfont=dict(family = 'arial', color = 'red', size = 14))

#styling Grid lines
fig.update_yaxes (showgrid=False)
fig.update_xaxes (showgrid=True, gridwidth=2, gridcolor='violet')

# custom title
fig.update_layout(title_font=dict(size=18, color='darkblue'), title_x=0.5)

# custom Line and Marker
fig.update_traces(line=dict(color='brown', width=3), marker=dict(color='grey', size=10))

#show plot
fig.show()





In [21]:
# buat dataframe baru untuk MultiLine chart
monthly_agg_city_df = dataset [(dataset['province'] == 'Jawa Timur') | (dataset['province'] == 'Jawa Tengah')]
# grupby (pivot) berdasarkan bulan untuk menentukan dataframe baru
monthly_agg_city_df = monthly_agg_city_df.groupby(['order_month', 'city']) ['gmv'].sum().reset_index()

In [22]:
monthly_agg_city_df.head()

Unnamed: 0,order_month,city,gmv
0,2019-07,Malang,139548000
1,2019-07,Purwokerto,102401000
2,2019-07,Semarang,54800000
3,2019-07,Surabaya,142528000
4,2019-07,Surakarta,95228000


In [23]:
# create plot
fig = px.line(monthly_agg_city_df, x='order_month', y='gmv', color='city')
# title
fig.update_layout(title = "GMV TOTAL PER CITY", title_font=dict(size = 22, color = "darkorange"), title_x = 0.5)
#update X & Y Labels
fig.update_yaxes(title_font = dict(size = 20, family = 'calibri', color = 'violet'))
fig.update_xaxes(title_font = dict(size = 20, family = 'calibri', color = 'violet'))

#show plot
fig.show()
