In [1]:
!pip install streamlit

In [22]:
%%writefile dashboard.py

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import streamlit as st
import datetime as dt
from babel.numbers import format_currency

def create_byproduct_df(df):
    sum_orders_df = df.groupby("product_category_name_english").product_id.count().sort_values(ascending=False).reset_index()
    return sum_orders_df

def create_bypayment_df(df):
    payment_df = df.groupby(by="payment_type").payment_value.mean().sort_values(ascending=False).reset_index()
    return payment_df

def customers_rating_df(df):
    customers_rating = df['review_score'].value_counts().sort_values(ascending=False)
    max_score = customers_rating.idxmax()
    customers_df = df['review_score']
    return (customers_rating, max_score, customer_df)

def create_rfm_df(df):
    current_time =dt.datetime(2018,12,10)
    df['order_purchase_timestamp'] = pd.to_datetime(df['order_purchase_timestamp'])
    recency = (current_time - df.groupby('customer_id')['order_purchase_timestamp'].max()).dt.days
    frequency = df.groupby('customer_id')['order_id'].count()
    monetary = df.groupby('customer_id')['price'].sum()

    # Create a new DataFrame with the calculated metrics
    rfm = pd.DataFrame({
        'customer_id': recency.index,
        'Recency': recency.values,
        'Frequency': frequency.values,
        'Monetary': monetary.values
    })

    col_list = ['customer_id','Recency','Frequency','Monetary']
    rfm.columns = col_list
    return rfm

all_df = pd.read_csv("proyek_analisis_data/submission/dashboard/main_data.csv")

datetime_columns=["order_approved_at",]
for column in datetime_columns:
    all_df[column]=pd.to_datetime(all_df[column])

#sidebar
with st.sidebar:
    # Menambahkan logo perusahaan
    st.image("https://github.com/dicodingacademy/assets/raw/main/logo.png")
    
    # Mengambil start_date & end_date dari date_input



st.subheader("pembelian terbesar dan terkecil")
col1, col2 = st.columns(2)

with col1:
    highest_product_sold=most_and_least_products_df['product_id'].max()
    st.markdown(f"Higest Number : **{highest_product_sold}**")

with col2:
    lowest_product_sold=most_and_least_products_df['product_id'].min()
    st.markdown(f"Lowest Number : **{lowest_product_sold}**")

fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(16, 8))

colors = ["#90CAF9", "#D3D3D3", "#D3D3D3", "#D3D3D3", "#D3D3D3"]

#sns.barplot(x="product_id", y="product_category_name_english", data=most_and_least_products_df.head(5),hue="product_category_name_english", palette=colors, ax=ax[0],)


sns.barplot(
    x="product_id", 
    y="product_category_name_english", 
    data=most_and_least_products_df.head(5), 
    palette=colors, 
    ax=ax[0],
    )
ax[0].set_ylabel('')
ax[0].set_xlabel('')
ax[0].set_title("products with the highest sales", loc="center", fontsize=18)
ax[0].tick_params(axis ='y', labelsize=15)

sns.barplot(
    x="product_id", 
    y="product_category_name_english", 
    data=most_and_least_products_df.sort_values(by="product_id", ascending=True).head(5), 
    palette=colors, 
    ax=ax[1],)
ax[1].set_ylabel('')
ax[1].set_xlabel('')
ax[1].invert_xaxis()
ax[1].yaxis.set_label_position("right")
ax[1].yaxis.tick_right()
ax[1].set_title("products with the lowest sales", loc="center", fontsize=18)
ax[1].tick_params(axis='y', labelsize=15)

plt.suptitle("most and least sold products", fontsize=20)
st.pyplot(fig)

Overwriting dashboard.py


In [14]:
!streamlit run dashboard.py 