In [None]:
import pandas as pd
from datetime import datetime

# 讀取上傳的文件
file_path = 'your_path/orders_members.csv'
orders_members = pd.read_csv(file_path)

# 銀級會員資料處理
silver_members = orders_members[orders_members['membership_tier'] == '銀級會員']

# 將 cutoff_date 設置為時區感知的日期時間對象
cutoff_date = pd.Timestamp('2024-01-01', tz='UTC')

# 確保日期欄位為日期時間格式
silver_members['registered_at'] = pd.to_datetime(silver_members['registered_at'], utc=True)
silver_members['created_at'] = pd.to_datetime(silver_members['created_at'], utc=True)

# 篩選註冊滿一年的銀級會員
silver_members_one_year = silver_members[silver_members['registered_at'] <= cutoff_date - pd.DateOffset(years=1)]

# 去除相同 order_number 的重複訂單
silver_members_one_year_unique_orders = silver_members_one_year.drop_duplicates(subset='order_number')

# 設定分析日期
analysis_date = pd.Timestamp('2023-12-31', tz='UTC')
start_date = pd.Timestamp('2023-01-01', tz='UTC')

# 計算一年內有消費和一年內未消費的顧客
one_year_orders = silver_members_one_year_unique_orders[(silver_members_one_year_unique_orders['created_at'] >= start_date) & (silver_members_one_year_unique_orders['created_at'] <= analysis_date)]
one_year_customers = one_year_orders['customer_id'].unique()
one_year_no_purchase_customers = silver_members_one_year_unique_orders[~silver_members_one_year_unique_orders['customer_id'].isin(one_year_customers)]['customer_id'].unique()

# 計算消費次數和購物週期
consumption_counts = one_year_orders.groupby('customer_id')['order_number'].count()
one_year_orders = one_year_orders.sort_values(['customer_id', 'created_at'])
one_year_orders['previous_order_date'] = one_year_orders.groupby('customer_id')['created_at'].shift(1)
one_year_orders['days_between_orders'] = (one_year_orders['created_at'] - one_year_orders['previous_order_date']).dt.days

average_shopping_cycle = one_year_orders.groupby('customer_id')['days_between_orders'].mean().dropna()
shopping_cycle_median = average_shopping_cycle.median()

print("銀級會員平均購買頻率（購物週期）的中位數:", shopping_cycle_median)

# 針對第一群顧客的分群
def categorize_active_customer(row, shopping_cycle_median, analysis_date):
    consumption_count = row['consumption_count']
    last_purchase_date = row['last_purchase_date']
    
    if consumption_count == 1 and (analysis_date - last_purchase_date).days < shopping_cycle_median:
        return 'N'  # 一年內第一次購物的顧客
    elif consumption_count > 1 and (analysis_date - last_purchase_date).days < shopping_cycle_median:
        return 'A'  # 一年內有兩次購物以上的顧客
    elif consumption_count == 1 and (analysis_date - last_purchase_date).days >= shopping_cycle_median:
        return 'L'  # 一年內買過一次但超過中位數天沒買第二次的顧客
    elif consumption_count > 1 and (analysis_date - last_purchase_date).days >= shopping_cycle_median:
        return 'P'  # 曾經買過2次以上，但近中位數天都沒有購物的顧客
    else:
        return 'Other1'

active_df = pd.DataFrame({
    'consumption_count': consumption_counts,
    'shopping_cycle': average_shopping_cycle,
    'last_purchase_date': one_year_orders.groupby('customer_id')['created_at'].max()
})

active_df['category'] = active_df.apply(categorize_active_customer, axis=1, shopping_cycle_median=shopping_cycle_median, analysis_date=analysis_date)

# 針對第二群顧客的分群
def categorize_inactive_customer(row, analysis_date):
    last_purchase_date = row['last_purchase_date']
    
    if row['total_orders'] == 0:
        return 'R'  # 註冊後從來沒有消費過的顧客
    elif (analysis_date - last_purchase_date).days >= 364:
        return 'S'  # 已經超過一年沒購買過的顧客
    else:
        return 'Other2'

inactive_customers_df = silver_members_one_year_unique_orders[silver_members_one_year_unique_orders['customer_id'].isin(one_year_no_purchase_customers)]
total_orders = silver_members_one_year_unique_orders.groupby('customer_id')['order_number'].count()
inactive_df = inactive_customers_df.groupby('customer_id').agg({'created_at': 'max'}).reset_index()
inactive_df.columns = ['customer_id', 'last_purchase_date']
inactive_df['total_orders'] = inactive_df['customer_id'].map(total_orders)

inactive_df['category'] = inactive_df.apply(categorize_inactive_customer, axis=1, analysis_date=analysis_date)

# 合併兩群顧客的分群結果
final_df = pd.concat([active_df, inactive_df.set_index('customer_id')], axis=0)

# 計算銀級會員2022年的消費總次數和消費總金額及總數量
silver_members_2022 = silver_members[silver_members['created_at'].dt.year == 2022]
order_count = silver_members_2022.groupby('customer_id')['order_number'].nunique().reset_index()
order_sum = silver_members_2022.groupby('customer_id')['items_total_dollar_after_discount'].sum().reset_index()
quantity_sum = silver_members_2022.groupby('customer_id')['quantity'].sum().reset_index()

# 合併計算結果
summary = pd.merge(order_count, order_sum, on='customer_id')
summary = pd.merge(summary, quantity_sum, on='customer_id')

summary.columns = ['customer_id', 'total_orders_2022', 'total_amount_2022', 'total_quantity_2022']

# 合併顧客分群結果與先前的計算結果
final_df = final_df.reset_index()
final_df = final_df.merge(summary, on='customer_id')

# 顯示結果
display(final_df)

In [None]:
#篩選欲分析之資料並合併
# 讀取上傳的文件
file_path = 'your_path/orders.csv'
orders = pd.read_csv(file_path)
# 讀取 item_category 資料集
item_category_path = "your_path/item_category.csv" 
item_category_data = pd.read_csv(item_category_path)
members = pd.read_csv('your_path/member.csv')

# 將 orders_members 中的 sku 轉換為商品名稱
orders = orders.merge(item_category_data, on='sku', how='left')
orders = orders.merge(members[['customer_id','membership_tier']], on='customer_id', how='left')

merge_data= pd.merge(orders[['customer_id','item_count','quantity', 'item_normal_price','membership_tier',
       'item_price_sale', 'items_total_dollar', 'discounted_price_dollars','items_total_dollar_after_discount','item_category']], final_df[['customer_id', 'category']], on='customer_id', how='left')


In [None]:
#對整體銀級會員進行折扣分析
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import font_manager
from matplotlib import font_manager as fm

# 設定中文字體
font_path = 'C:/Windows/Fonts/msjh.ttc'  # 微軟正黑體的字體路徑
font_prop = fm.FontProperties(fname=font_path)
plt.rcParams['font.family'] = font_prop.get_name()
plt.rcParams['axes.unicode_minus'] = False  # 解決負號顯示問題

#從order資料找出非會員下單的資料
silver = merge_data[merge_data['membership_tier']=='銀級會員']
item_categories = silver['item_category'].unique()

# 設置圖表的行列數
n_cols = 3  # 每行顯示3個熱圖
n_rows = (len(item_categories) + n_cols - 1) // n_cols  # 計算行數

plt.figure(figsize=(n_cols * 6, n_rows * 4))  # 調整整體圖的尺寸

for idx, category in enumerate(item_categories):
    category_data = silver[silver['item_category'] == category]
    corr_matrix = category_data[['discounted_price_dollars', 'item_count', 'quantity']].corr()
    
    plt.subplot(n_rows, n_cols, idx + 1)
    sns.heatmap(corr_matrix, annot=True, cmap="coolwarm", vmin=-1, vmax=1)
    plt.title(f'{category} heatmap')

plt.tight_layout()
plt.show()

In [None]:
# 篩選出銀級會員且分類為'A'的資料
silver_A = merge_data[(merge_data['membership_tier'] == '銀級會員') & (merge_data['category'] == 'A')]
item_categories_A = silver_A['item_category'].unique()

# 設置圖表的行列數
n_cols = 3  # 每行顯示3個熱圖
n_rows = (len(item_categories_A) + n_cols - 1) // n_cols  # 計算行數

plt.figure(figsize=(n_cols * 6, n_rows * 4))  # 調整整體圖的尺寸

for idx, category in enumerate(item_categories_A):
    category_data = silver_A[silver_A['item_category'] == category]
    corr_matrix = category_data[['discounted_price_dollars', 'item_count', 'quantity']].corr()
    
    plt.subplot(n_rows, n_cols, idx + 1)
    sns.heatmap(corr_matrix, annot=True, cmap="coolwarm", vmin=-1, vmax=1)
    plt.title(f'{category} heatmap')

plt.tight_layout()
plt.show()

In [None]:
# 篩選出銀級會員且分類為'P'的資料
silver_P = merge_data[(merge_data['membership_tier'] == '銀級會員') & (merge_data['category'] == 'P')]
item_categories_P = silver_P['item_category'].unique()

# 設置圖表的行列數
n_cols = 3  # 每行顯示3個熱圖
n_rows = (len(item_categories_P) + n_cols - 1) // n_cols  # 計算行數

plt.figure(figsize=(n_cols * 6, n_rows * 4))  # 調整整體圖的尺寸

for idx, category in enumerate(item_categories_P):
    category_data = silver_P[silver_P['item_category'] == category]
    corr_matrix = category_data[['discounted_price_dollars', 'item_count', 'quantity']].corr()
    
    plt.subplot(n_rows, n_cols, idx + 1)
    sns.heatmap(corr_matrix, annot=True, cmap="coolwarm", vmin=-1, vmax=1)
    plt.title(f'{category} heatmap')

plt.tight_layout()
plt.show()

In [None]:
# 篩選出銀級會員且分類為'L'的資料
silver_L = merge_data[(merge_data['membership_tier'] == '銀級會員') & (merge_data['category'] == 'L')]
item_categories_L = silver_L['item_category'].unique()

# 設置圖表的行列數
n_cols = 3  # 每行顯示3個熱圖
n_rows = (len(item_categories_L) + n_cols - 1) // n_cols  # 計算行數

plt.figure(figsize=(n_cols * 6, n_rows * 4))  # 調整整體圖的尺寸

for idx, category in enumerate(item_categories_L):
    category_data = silver_L[silver_L['item_category'] == category]
    corr_matrix = category_data[['discounted_price_dollars', 'item_count', 'quantity']].corr()
    
    plt.subplot(n_rows, n_cols, idx + 1)
    sns.heatmap(corr_matrix, annot=True, cmap="coolwarm", vmin=-1, vmax=1)
    plt.title(f'{category} heatmap')

plt.tight_layout()
plt.show()

In [None]:
# 篩選出銀級會員且分類為'S'的資料
silver_S = merge_data[(merge_data['membership_tier'] == '銀級會員') & (merge_data['category'] == 'S')]
item_categories_S = silver_S['item_category'].unique()

# 設置圖表的行列數
n_cols = 3  # 每行顯示3個熱圖
n_rows = (len(item_categories_S) + n_cols - 1) // n_cols  # 計算行數

plt.figure(figsize=(n_cols * 6, n_rows * 4))  # 調整整體圖的尺寸

for idx, category in enumerate(item_categories_S):
    category_data = silver_S[silver_S['item_category'] == category]
    corr_matrix = category_data[['discounted_price_dollars', 'item_count', 'quantity']].corr()
    
    plt.subplot(n_rows, n_cols, idx + 1)
    sns.heatmap(corr_matrix, annot=True, cmap="coolwarm", vmin=-1, vmax=1)
    plt.title(f'{category} heatmap')

plt.tight_layout()
plt.show()