# Bubble (by Matthew)

In [138]:
import pandas as pd
import plotly.graph_objects as go
import networkx as nx

# Load dan preprocess data
df_salesv2 = pd.read_csv('csv/fact_sales_v2.csv', sep=';')
display(df_salesv2.sort_values(by='order_id').head())
df_salesv2 = df_salesv2[['order_id', 'order_date', 'product_id', 'order_item_name', 'total_sales']].sort_values(by='order_id')
df_salesv2['total_sales'] = df_salesv2['total_sales'].str.replace('\\0','0')
if df_salesv2['total_sales'].dtype == object:
    df_salesv2['total_sales'] = (
        df_salesv2['total_sales']
        .str.replace('.', '', regex=False)
        .str.replace(',', '.', regex=False)
        .astype(float)
)
    
display(df_salesv2.head(30))
df_salesv2.shape

Unnamed: 0,order_id,order_date,product_id,order_item_name,total_sales,order_item_type,product_qty,product_net_revenue,customer_id,first_name,last_name,email,city,state
196,1036,2025-05-08 22:36:36,565,Atasan Basic Wanita,2,line_item,1,62700,19,Marlene,Greally,mgreallyo@quantcast.com,Pontianak,KB
195,1037,2025-05-08 22:53:26,1020,This Is April Atasan Kemeja Lengan Panjang,8,line_item,1,329000,24,Eberto,Nern,enernp@weibo.com,Pontianak,KB
194,1038,2025-05-08 22:56:25,1019,AMOENE [MADE] Long Strap Dress Navy,8,line_item,1,550000,20,Roma,Langfat,rlangfatq@istockphoto.com,Kota Singkawang,KB
193,1039,2025-05-08 22:59:51,1015,Point One NINDY Khaki Tunik Dress,1,line_item,1,159000,25,Alexandros,Korneichik,akorneichikr@quantcast.com,Kota Mempawah,KB
192,1040,2025-05-08 23:02:06,1010,Hosofshopaholic - Darel Skirt,7,line_item,1,160000,26,Giuseppe,Gerard,ggerards@instagram.com,Kabupaten Sintang,KB


Unnamed: 0,order_id,order_date,product_id,order_item_name,total_sales
196,1036,2025-05-08 22:36:36,565,Atasan Basic Wanita,2.0
195,1037,2025-05-08 22:53:26,1020,This Is April Atasan Kemeja Lengan Panjang,8.0
194,1038,2025-05-08 22:56:25,1019,AMOENE [MADE] Long Strap Dress Navy,8.0
193,1039,2025-05-08 22:59:51,1015,Point One NINDY Khaki Tunik Dress,1.0
192,1040,2025-05-08 23:02:06,1010,Hosofshopaholic - Darel Skirt,7.0
191,1041,2025-05-08 23:10:21,983,ORO PANTS,4.0
190,1042,2025-05-08 23:12:14,1010,Hosofshopaholic - Darel Skirt,7.0
189,1043,2025-05-08 23:19:30,1002,This Is April Rok Midi Crochet Wanita,7.0
188,1044,2025-05-08 23:23:15,1002,This Is April Rok Midi Crochet Wanita,7.0
187,1045,2025-05-08 23:25:58,586,Baju Atasan Lengan Pendek Wanita,3.0


(197, 5)

In [139]:
# Load dan preprocess data
df_salesv1 = pd.read_csv('csv/fact_sales_v1.csv', sep=';')
display(df_salesv1.head())
display(df_salesv1.shape)
df_salesv1 = df_salesv1[df_salesv1['order_item_type'] != 'shipping']
df_salesv1 = df_salesv1[['order_id','ip_address']]
display(df_salesv1.head())
df_salesv1.shape

Unnamed: 0,order_id,order_date,ip_address,total_amount,customer_id,first_name,last_name,email,country,city,state,order_item_name,order_item_type,product_qty
0,1036,2025-05-08 22:36:36,118.99.118.196,6270000000000,62,Vaclav,Wedge,vwedge1s@typepad.com,ID,Malang,JI,Atasan Basic Wanita,line_item,1
1,1036,2025-05-08 22:36:36,118.99.118.196,6270000000000,62,Vaclav,Wedge,vwedge1s@typepad.com,ID,Malang,JI,Free shipping,shipping,1
2,1037,2025-05-08 22:53:26,118.99.118.196,32900000000000,63,Ysabel,Grimster,ygrimster1h@timesonline.co.uk,ID,Samarinda,KI,This Is April Atasan Kemeja Lengan Panjang,line_item,1
3,1037,2025-05-08 22:53:26,118.99.118.196,32900000000000,63,Ysabel,Grimster,ygrimster1h@timesonline.co.uk,ID,Samarinda,KI,Free shipping,shipping,1
4,1038,2025-05-08 22:56:25,118.99.118.196,55000000000000,64,Olive,O'Logan,oologanc@imgur.com,ID,Kota Pontianak,KB,AMOENE [MADE] Long Strap Dress Navy,line_item,1


(752, 14)

Unnamed: 0,order_id,ip_address
0,1036,118.99.118.196
2,1037,118.99.118.196
4,1038,118.99.118.196
6,1039,118.99.118.196
8,1040,118.99.118.196


(555, 2)

In [140]:
merge_bubble = df_salesv2.merge(df_salesv1, on='order_id', how='inner').sort_values(by='order_id')
merge_bubble.drop_duplicates(inplace=True)
display(merge_bubble.head(50))
merge_bubble.shape

Unnamed: 0,order_id,order_date,product_id,order_item_name,total_sales,ip_address
0,1036,2025-05-08 22:36:36,565,Atasan Basic Wanita,2.0,118.99.118.196
1,1037,2025-05-08 22:53:26,1020,This Is April Atasan Kemeja Lengan Panjang,8.0,118.99.118.196
2,1038,2025-05-08 22:56:25,1019,AMOENE [MADE] Long Strap Dress Navy,8.0,118.99.118.196
3,1039,2025-05-08 22:59:51,1015,Point One NINDY Khaki Tunik Dress,1.0,118.99.118.196
4,1040,2025-05-08 23:02:06,1010,Hosofshopaholic - Darel Skirt,7.0,118.99.118.196
5,1041,2025-05-08 23:10:21,983,ORO PANTS,4.0,118.99.118.196
6,1042,2025-05-08 23:12:14,1010,Hosofshopaholic - Darel Skirt,7.0,118.99.118.196
7,1043,2025-05-08 23:19:30,1002,This Is April Rok Midi Crochet Wanita,7.0,118.99.118.196
8,1044,2025-05-08 23:23:15,1002,This Is April Rok Midi Crochet Wanita,7.0,118.99.118.196
9,1045,2025-05-08 23:25:58,586,Baju Atasan Lengan Pendek Wanita,3.0,118.99.118.196


(197, 6)

In [143]:
df_final = merge_bubble[['product_id', 'order_item_name', 'total_sales']].copy()
df_final.columns = ['id', 'order_item_name', 'total_sales']
df_final = df_final.drop_duplicates(subset=['id'], keep='first')

# Ambil 5 produk teratas
df_top5 = df_final.sort_values('total_sales', ascending=False).head(5)

# Scaling ukuran bubble
sizes = (df_top5['total_sales'] / df_top5['total_sales'].max()) * 100 + 40  

# Atur posisi bubble: Buat graph kosong dan layout spring (bubble cloud)
G = nx.Graph()
for i in range(len(df_top5)):
    G.add_node(i)

pos = nx.spring_layout(G, k=0.5, seed=42)  # posisi node dengan spring layout

x_pos = [pos[i][0] for i in range(len(df_top5))]
y_pos = [pos[i][1] for i in range(len(df_top5))]

# Fungsi untuk memformat teks agar setiap baris memiliki maksimal `max_words` kata
def wrap_text(text, max_words=4): 
    words = text.split()
    return "<br>".join([" ".join(words[i:i+max_words]) for i in range(0, len(words), max_words)])

# Buat figure Plotly
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=x_pos,
    y=y_pos,
    mode='markers+text',
    marker=dict(
        size=sizes,
        color=df_top5['total_sales'],
        colorscale='Viridis',
        showscale=True,
        line=dict(width=2, color='DarkSlateGrey'),
        sizemode='diameter',
        opacity=0.7,
    ),
    hoverinfo='text',
    hovertext=[f"{name}<br>Total Sales: {total:,.0f}" for name, total in zip(df_top5['order_item_name'], df_top5['total_sales'])]
))

annotations = []
for x, y, name, total in zip(x_pos, y_pos, df_top5['order_item_name'], df_top5['total_sales']):
    wrapped_text = wrap_text(name)
    full_text = f"{wrapped_text}<br><b>{int(total):,}</b>"
    annotations.append(dict(
        x=x,
        y=y,
        text=full_text,
        showarrow=False, # Tidak menampilkan panah anotasi
        font=dict(
            color="black",
            size=12,
            family="Arial"
        ),
        align="center",
        xanchor="center",
        yanchor="middle",
        bgcolor="white",
        bordercolor="gray",
        borderwidth=1,
        borderpad=4,
        opacity=0.8
    ))

fig.update_layout(annotations=annotations) # Menambahkan anotasi ke dalam layout grafik

fig.update_layout(
    title="Bubble Cloud Chart: Top 5 Produk by Total Penjualan",
    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
    plot_bgcolor='white',
    height=600,
    width=800,
    margin=dict(l=20, r=20, t=60, b=20)
)

fig.show()