In [16]:
from pydantic import BaseModel
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules


# 併売データ（例として使用する）
df = pd.read_csv('./hayatomo_6000_oh.csv', index_col=0)

#print(df.tail(50))

#””対策
import ast
df['ppv_history'] = df['ppv_history'].apply(lambda x: ast.literal_eval(x))

transactions = df['ppv_history'].values.tolist()#[:100]
#print(transactions)

# データをワンホットエンコーディング
def encode_transactions(transactions):
    all_items = sorted(set(item for sublist in transactions for item in sublist))
    encoded_df = pd.DataFrame([[int(item in transaction) for item in all_items] for transaction in transactions], columns=all_items)
    return encoded_df

# アポリオリアルゴリズムを実行
def apriori_algorithm(transactions):
    df_encoded = encode_transactions(transactions)
    frequent_itemsets = apriori(df_encoded, min_support=0.02, use_colnames=True)
    rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.1, num_itemsets=len(frequent_itemsets))
    rules_set = rules.sort_values(by='support', ascending=False)
    return rules_set

# ルールから特定の商品に関連する信頼度TOP10を抽出
def get_top_10_confidence_items(item_id, rules):
    
    # antecedentsが1つのアイテムだけで構成されているルールをフィルタリング
    filtered_rules = rules[rules['antecedents'].apply(lambda x: len(x) == 1 and item_id in list(x))]

    #　信頼度をもとにしてtop10を表示
    top_10 = filtered_rules.sort_values(by='confidence', ascending=False)#.head(200)
    
    associated_items = [list(rule['consequents'])[0] for _, rule in top_10.iterrows() if len(rule['consequents']) == 1]   #return [item for sublist in associated_items for item in sublist]

    return associated_items


#df_encoded = encode_transactions(transactions)
#print(df_encoded)
#rules = apriori_algorithm(transactions)
#print(rules)

item_list = ['002', '003', '009', '403', '035', '036', '008', '001', '010', '020', '024', '026', '025', '052', '402', '5', '027', '028', '021', '022', '061', '055', '046', '004', '015', '016', '031', '032', '059', '049', '058', '610', '060', '041', '029', '006', '007', '012', '018', '019', '023', '051', '045']

# 推奨商品を抽出

recommendations = []
for il in item_list:
    rules = apriori_algorithm(transactions)
    #print(rules)
    
    top_items = get_top_10_confidence_items(il, rules)
    recommendations.append([il , top_items])
    
    print(il , top_items , len(top_items))

#df = pd.DataFrame(recommendations, columns=['Key', 'Value'])
#df.to_csv('basket.csv')



002 ['009', '005', '008', '016', '003', '017', '055', '018', '021', '024', '025', '020', '027', '403', '046', '026', '069', '031', '022', '029'] 20




003 ['005', '002', '009', '008', '001', '004', '010', '055', '014', '018'] 10




009 ['002', '005', '008', '017', '016', '018', '055', '025', '020', '024', '027', '021', '003', '046', '031', '022', '026', '029', '069', '015', '030', '033', '072', '023', '019'] 25




403 ['005', '002', '009', '008', '014', '001', '010', '055'] 8




035 ['005', '003'] 2




036 ['002', '009', '005'] 3




008 ['002', '009', '005', '016', '024', '026', '025', '055', '003', '019', '018', '031', '017', '021', '046', '027', '020', '069', '029', '015', '072', '022', '403', '060', '030', '001', '033', '058', '100', '610', '079', '011', '057', '053', '023', '006', '068', '091', '084', '066', '065'] 41




001 ['005', '003', '004', '002', '009', '008', '403', '012', '010'] 9




010 ['005', '003', '001', '403', '014'] 5




020 ['002', '009', '018', '005', '017', '016', '027', '008', '021', '025', '024', '046', '069', '055', '030', '022', '072', '029', '026', '060', '032', '033', '065', '019', '028', '051', '031', '015', '056'] 29




024 ['002', '009', '005', '008', '017', '025', '016', '046', '022', '021', '026', '027', '031', '069', '018', '029', '072', '030', '020', '023', '019', '033', '055', '028', '051', '032', '073', '065', '060', '003', '056', '057', '058', '084', '052'] 35




026 ['002', '008', '009', '005', '024', '025', '031', '027', '017', '022', '046', '072', '029', '016', '018', '021', '069', '033', '020', '019', '060', '030', '032', '084', '073'] 25




025 ['002', '009', '024', '027', '008', '017', '046', '005', '016', '026', '021', '029', '018', '020', '031', '069', '022', '030', '072', '033', '060', '065', '068', '055', '019', '057', '023', '032', '051', '028', '073', '053', '056', '079'] 34




052 ['002', '005', '009', '032', '017', '018', '024', '051', '069'] 9




402 ['009', '002', '005'] 3




5 [] 0




027 ['002', '009', '025', '005', '016', '017', '024', '021', '020', '026', '008', '029', '033', '018', '031', '069', '072', '046', '022', '030', '032', '028', '055', '065', '051', '060', '073', '019', '056', '058', '068', '023', '084'] 33




028 ['002', '009', '069', '027', '005', '017', '032', '024', '016', '033', '022', '030', '025', '072', '065', '020', '060', '018', '031', '029', '051', '046', '055', '021'] 24




021 ['002', '009', '016', '005', '017', '018', '024', '008', '027', '025', '020', '046', '055', '072', '022', '026', '031', '069', '019', '030', '029', '023', '033', '073', '003', '060', '028', '068', '057'] 29




022 ['002', '009', '024', '017', '005', '016', '026', '072', '025', '027', '046', '018', '008', '029', '069', '033', '021', '031', '020', '030', '065', '028', '032', '073', '056', '023', '060', '019', '051', '084'] 30




061 [] 0




055 ['002', '009', '005', '008', '016', '069', '003', '017', '018', '021', '020', '027', '046', '024', '025', '033', '072', '032', '015', '031', '030', '403', '028', '029'] 24




046 ['002', '009', '024', '005', '025', '017', '008', '031', '069', '016', '027', '026', '030', '021', '020', '022', '018', '029', '032', '055', '072', '057', '033', '060', '023', '051', '065', '073', '068', '058', '011', '028'] 32




004 ['005', '001', '003', '002'] 4




015 ['002', '009', '005', '008', '016', '020', '055', '017', '072'] 9




016 ['002', '009', '005', '017', '018', '008', '021', '024', '020', '027', '025', '022', '029', '069', '072', '055', '065', '046', '031', '030', '019', '023', '026', '068', '015', '033', '056', '028', '073', '060', '057', '003', '051', '032', '090', '085', '011'] 37




031 ['002', '009', '005', '008', '029', '024', '026', '017', '027', '025', '046', '016', '018', '033', '030', '021', '022', '069', '032', '072', '023', '065', '051', '020', '056', '055', '060', '073', '028', '019'] 30




032 ['002', '009', '017', '069', '046', '051', '005', '027', '072', '028', '033', '030', '031', '022', '024', '029', '025', '057', '055', '018', '020', '065', '052', '016', '026', '058', '060', '073', '023'] 29




059 [] 0




049 [] 0




058 ['002', '009', '008', '060', '072', '027', '017', '024', '046', '005', '032', '069'] 12




610 ['002', '009', '005', '008'] 4




060 ['002', '009', '005', '008', '072', '025', '046', '018', '017', '027', '069', '026', '065', '058', '016', '020', '024', '022', '021', '051', '031', '028', '029', '056', '066', '073', '032'] 27




041 [] 0




029 ['002', '009', '031', '016', '005', '025', '027', '024', '017', '008', '030', '026', '046', '069', '022', '033', '018', '021', '072', '032', '020', '065', '060', '028', '019', '056', '051', '055', '066'] 29




006 ['002', '009', '005', '008'] 4




007 ['005', '002'] 2




012 ['001'] 1




018 ['002', '009', '016', '005', '017', '020', '021', '008', '024', '027', '025', '031', '022', '046', '072', '055', '030', '029', '026', '069', '060', '065', '003', '033', '032', '023', '051', '056', '073', '068', '052', '028', '019', '057', '045'] 35




019 ['002', '008', '009', '017', '024', '005', '016', '021', '025', '026', '020', '027', '022', '018', '069', '031', '029'] 17




023 ['002', '009', '024', '017', '016', '046', '069', '021', '025', '031', '022', '005', '018', '027', '008', '030', '032'] 17




051 ['009', '002', '032', '017', '005', '027', '024', '046', '065', '025', '072', '069', '018', '030', '020', '022', '031', '016', '060', '028', '056', '057', '052', '029'] 24




045 ['002', '009', '018', '057'] 4


In [17]:
import json

# リスト内包表記を使用して辞書のリストに変換
data_dict_list = [{'Key': key, 'Value': values} for key, values in recommendations]

# JSONファイルに保存
json_filename = 'output_basket.json'

with open(json_filename, 'w', encoding='utf-8') as f:
    json.dump(data_dict_list, f, ensure_ascii=False, indent=4)

print(f"データを '{json_filename}' に保存しました。")

データを 'output_basket.json' に保存しました。


In [2]:
transactions

[['403', '005', '005', '403', '5', '005'],
 ['005', '002', '008', '017', '016', '103', '612', '124'],
 ['403', '403', '403'],
 ['014', '403', '002'],
 ['002', '011', '009', '002'],
 ['045', '624', '036', '002', '001', '021', '001', '653'],
 ['013', '018', '016', '403', '008', '016', '021', '613', '002'],
 ['403', '005', '014', '010'],
 ['005', '010', '403', '001'],
 ['005', '003', '072', '017', '058', '002', '072', '022', '090', '565', '086'],
 ['005', '003', '035'],
 ['003', '010', '001', '035', '512', '005'],
 ['002', '009', '562'],
 ['005', '003', '002', '020', '009', '031', '015', '017', '070', '622', '005'],
 ['002', '009', '005'],
 ['002', '009', '077', '055', '020'],
 ['105', '504', '120'],
 ['055', '402', '009'],
 ['009', '002', '003', '005', '018', '017', '016', '001', '055'],
 ['125', '033', '028', '026', '024', '022', '027', '131', '019'],
 ['111',
  '016',
  '104',
  '109',
  '017',
  '029',
  '117',
  '118',
  '110',
  '119',
  '123',
  '122',
  '056'],
 ['002', '652', '00