## streamlit을 이용한 대쉬보드 그리기

### (1) 코드 함수 모듈화

In [12]:
# !pip install streamlit
# !pip install localtunnel
# !pip install "ipywidgets>=7, <8"

In [13]:
import pandas as pd
import numpy as np
import plotly.express as px

In [14]:
Olist = pd.read_csv("./Data/List of Orders.csv")
Detail = pd.read_csv("./Data/Order Details.csv")
data = Olist.merge(Detail, on = 'Order ID')
data.shape
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1500 entries, 0 to 1499
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Order ID      1500 non-null   object 
 1   Order Date    1500 non-null   object 
 2   CustomerName  1500 non-null   object 
 3   State         1500 non-null   object 
 4   City          1500 non-null   object 
 5   Amount        1500 non-null   float64
 6   Profit        1500 non-null   float64
 7   Quantity      1500 non-null   int64  
 8   Category      1500 non-null   object 
 9   Sub-Category  1500 non-null   object 
dtypes: float64(2), int64(1), object(7)
memory usage: 117.3+ KB


In [15]:
data['Order Date'] = pd.to_datetime(data['Order Date'], format='%d-%m-%Y')
data['year'] = data['Order Date'].dt.year
data['month'] = data['Order Date'].dt.month
data['yearmonth'] = data['Order Date'].astype('str').str.slice(0, 7)

In [16]:
def load_data() : 
    Olist = pd.read_csv("./Data/List of Orders.csv")
    Detail = pd.read_csv("./Data/Order Details.csv")
    data = Olist.merge(Detail, on = 'Order ID')

    return data

def preproc():
    data['Order Date'] = pd.to_datetime(data['Order Date'], format='%d-%m-%Y')
    data['year'] = data['Order Date'].dt.year
    data['month'] = data['Order Date'].dt.month
    data['yearmonth'] = data['Order Date'].astype('str').str.slice(0, 7)

    return data

In [17]:
def line_chart(data, x, y, title) : 
    df = data.groupby(x).agg({y : 'sum'}).reset_index()
    fig = px.line(df, x=x, y=y, title=title)
    fig.show()

    return fig

# 함수 확인
fig1 = line_chart(data, 'yearmonth', 'Quantity', 'Sales Quantity by month')
fig2 = line_chart(data, 'yearmonth', 'Amount', 'Sales Amount by month')

In [18]:
def bar_chart(data, x, y, color=None):
    if color is not None:
        index = [x, color]
    else : 
        index = x

    df = data.pivot_table(index=index, values=y, aggfunc='sum').reset_index()
    fig = px.bar(df, x = x, y = y, color = color)
    fig.show()
    
    return fig

# 함수 확인
fig3 = bar_chart(data, 'Category', 'Quantity')
fig4 = bar_chart(data, 'yearmonth', 'Quantity', 'Category')

In [19]:
def heatmap(data, z, title) : 
    df = data.pivot_table(index = ['State', 'Sub-Category'], values=['Quantity', 'Amount', 'Profit'], aggfunc='sum').reset_index()
    fig = px.density_heatmap(df, x='State', y='Sub-Category', z=z, title=title)
    fig.show()

    return fig

fig5 = heatmap(data, 'Quantity', 'Quantity heat map')
fig6 = heatmap(data, 'Amount', 'Amount heat map')

### (2) streamlit을 이용한 대시보드 구현

In [20]:
%%writefile app.py

import streamlit as st
import plotly.express as px
import pandas as pd
import numpy as np

# 데이터 로드
@st.cache_data
def load_data() : 
    Olist = pd.read_csv("./Data/List of Orders.csv")
    Detail = pd.read_csv("./Data/Order Details.csv")
    data = Olist.merge(Detail, on = 'Order ID')

    return data

# 전처리
def preproc():
    data['Order Date'] = pd.to_datetime(data['Order Date'], format='%d-%m-%Y')
    data['year'] = data['Order Date'].dt.year
    data['month'] = data['Order Date'].dt.month
    data['yearmonth'] = data['Order Date'].astype('str').str.slice(0, 7)

    return data

# line chart
def line_chart(data, x, y, title) : 
    df = data.groupby(x).agg({y : 'sum'}).reset_index()
    fig = px.line(df, x=x, y=y, title=title)
    # fig.show()

    return df,fig

# bar chart
def bar_chart(data, x, y, color=None):
    if color is not None:
        index = [x, color]
    else : 
        index = x

    df = data.pivot_table(index=index, values=y, aggfunc='sum').reset_index()
    fig = px.bar(df, x = x, y = y, color = color)
    # fig.show()
    
    return fig

# heatmap
def heatmap(data, z, title) : 
    df = data.pivot_table(index = ['State', 'Sub-Category'], values=['Quantity', 'Amount', 'Profit'], aggfunc='sum').reset_index()
    fig = px.density_heatmap(df, x='State', y='Sub-Category', z=z, title=title)
    # fig.show()

    return fig

if __name__ == "__main__" :

    st.title('E-Commerce Data 분석')
    st.write('시각화 대시보드 만들기')

    # 데이터 로드
    data = load_data()
    # 데이터 전처리
    data = preproc()

st.subheader('월별 판매량 분석')
with st.form('form', clear_on_submit = True):
    col1, col2 = st.columns(2)
    submitted1 = col1.form_submit_button('판매량 그래프')
    submitted2 = col2.form_submit_button('매출액 그래프')
    if submitted1:
        df1, fig1 = line_chart(data, 'yearmonth', 'Quantity', 'Sales Quantity by month')
        st.dataframe(df1.T)
        st.plotly_chart(fig1, theme='streamlit', use_container_width=True)
    elif submitted2:
        df2, fig2 = line_chart(data, 'yearmonth', 'Amount', 'Sales Amount by month')
        st.dataframe(df2.T)
        st.plotly_chart(fig2, theme='streamlit', use_container_width=True)

st.subheader('품목별 판매량')
col1, col2 = st.columns(2)
with col1 : 
    col1.subheader('카테고리별 판매량')
    fig3 = bar_chart(data, 'Category', 'Quantity')
    st.plotly_chart(fig3, theme='streamlit', use_container_width=True)
with col2 : 
    col2.subheader('월별/카테고리별 누적 차트')
    fig4 = bar_chart(data, 'yearmonth', 'Quantity', 'Category')
    st.plotly_chart(fig4, theme='streamlit', use_container_width=True)

st.subheader('지역별 주력 판매 상품')
tab1, tab2 = st.tabs(['Quantity heat map', 'Amount heat map'])
with tab1:
    fig5 = heatmap(data, 'Quantity', 'Quantity heat map')
    st.plotly_chart(fig5, theme='streamlit', use_container_width=True)
with tab2:
    fig6 = heatmap(data, 'Amount', 'Amount heat map')
    st.plotly_chart(fig6, theme='streamlit', use_container_width=True)

Overwriting app.py


In [21]:
# !pip install watchdog

In [22]:
# run app.py
!streamlit run app.py

[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://192.168.0.17:8501[0m
[0m
2024-05-20 23:02:18.883 Serialization of dataframe to Arrow table was unsuccessful due to: ("Expected bytes, got a 'int' object", 'Conversion failed for column 0 with type object'). Applying automatic fixes for column types to make the dataframe Arrow-compatible.
2024-05-20 23:02:21.407 Serialization of dataframe to Arrow table was unsuccessful due to: ("Expected bytes, got a 'int' object", 'Conversion failed for column 0 with type object'). Applying automatic fixes for column types to make the dataframe Arrow-compatible.
2024-05-20 23:02:22.173 Serialization of dataframe to Arrow table was unsuccessful due to: ("Expected bytes, got a 'float' object", 'Conversion failed for column 0 with type object'). Applying automatic fixes for column types to make the dataframe Arrow-compatible.
2024-05-20 23: