In [169]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm import tqdm
import ast
from scipy.stats import beta
from collections import defaultdict

In [170]:
rating = pd.read_csv('../data/clean.csv')
view = pd.read_csv('../data/total_view.csv')

rating_outfit = set(rating.outfit_id.unique())
view_outfit = set(view.outfit_id.unique()) 

valid_outfit = list(rating_outfit.union(view_outfit))

rating_session = set(rating.session_id.unique())
view_session = set(view.session_id.unique()) 

valid_session = list(rating_session.union(view_session))

In [171]:
outfit = pd.read_csv('../data/check.csv')

In [172]:
rating['rating'] = 1 
view['rating'] = 0 

In [173]:
outfit.gpttag

0        ['조던', '스니커즈', '롱', '나이키', '패딩', '블랙', '데님', '...
1                                  ['블랙', '더플코트', '크로스 백']
2                    ['후드', '연청', '집업', '그레이', '데님', '팬츠']
3                                ['와이드 슬랙스', '블랙 컬러 블레이저']
4               ['다크 그레이 스커트', '레드 스니커즈', '블랙 컬러의 라이더 재킷']
                               ...                        
10424                           ['블랙', '셔츠', '그린', '조거팬츠']
10425           ['니들스', '스니커즈', '발렌시아가', 'HD', '데님', '팬츠']
10426                       ['페이즐리 패턴', '셔츠', '글래디에이터 샌들']
10427                          ['블랙', '미니 크로스 백', '데님 팬츠']
10428                                     ['워크 캡', '오버 롤']
Name: gpttag, Length: 10429, dtype: object

In [174]:
def removebubble(row):
    bubble={'컬러의','컬러','무신사','룩','매칭한','매칭','와'}
    new_tags=set(list(row).copy())
    return list(new_tags-bubble)

def tolist_withspace(row):
    return list(row.rstrip("]").lstrip("[").replace(" '","").replace(' "','').replace('"','').replace("'","").replace("컬러의 ",'').replace("컬러 ",'').split(","))
#return list(row['gpttag'].rstrip("]").lstrip("[").replace(' ','').replace('"','').replace("'","").replace("컬러의",'').replace("컬러",'').split(","))
def tolist(row):
    return list(row.rstrip("]").lstrip("[").replace('"','').replace("'","").replace("컬러의",'').replace("컬러",'').split(","))

In [175]:
outfit.gpttag = outfit.gpttag.apply(tolist)
# outfit.gpttag = outfit.gpttag.apply(tolist_withspace)
outfit.gpttag = outfit.gpttag.apply(removebubble)


In [176]:
outfit.gpttag

0        [ 나이키,  블랙,  패딩,  스커트, 조던,  롱,  스니커즈,  데님]
1                               [ 더플코트, 블랙,  크로스 백]
2                    [후드,  집업,  연청,  팬츠,  데님,  그레이]
3                              [와이드 슬랙스,  블랙  블레이저]
4               [다크 그레이 스커트,  레드 스니커즈,  블랙  라이더 재킷]
                            ...                    
10424                         [ 조거팬츠,  셔츠,  그린, 블랙]
10425           [ HD, 니들스,  팬츠,  스니커즈,  발렌시아가,  데님]
10426                    [ 셔츠, 페이즐리 패턴,  글래디에이터 샌들]
10427                       [ 데님 팬츠,  미니 크로스 백, 블랙]
10428                                 [ 오버 롤, 워크 캡]
Name: gpttag, Length: 10429, dtype: object

In [177]:
item_list = []
for items in outfit.gpttag:
    for item in items:
        if item not in item_list:
            item_list.append(item)

In [178]:
item2idx = {item:idx for idx, item in enumerate(item_list)}

In [179]:
len(item2idx)

5038

In [180]:
alphas = np.ones(len(item2idx))
betas = np.ones(len(item2idx))


In [181]:
total = pd.concat([rating, view], axis=0)
total = total.loc[total.isin({'session_id':valid_session})['session_id']]

In [182]:
total.drop_duplicates(subset=['session_id', 'outfit_id'], keep='first', inplace=True)

In [183]:
total.loc[total.rating==1]

Unnamed: 0,session_id,user_id,outfit_id,timestamp,rating,view_type
0,7708c8e7-4292-4ff9-99b1-27be20427e42,1.0,83783,23-07-19 09:26:07,1,
1,7708c8e7-4292-4ff9-99b1-27be20427e42,1.0,83800,23-07-19 09:26:10,1,
2,7708c8e7-4292-4ff9-99b1-27be20427e42,1.0,83791,23-07-19 09:26:12,1,
3,7708c8e7-4292-4ff9-99b1-27be20427e42,1.0,84029,23-07-19 09:26:16,1,
4,7708c8e7-4292-4ff9-99b1-27be20427e42,1.0,83706,23-07-19 09:26:18,1,
...,...,...,...,...,...,...
1694,22af75f5-cabe-46e8-90b3-08613327f389,,85210,2023-07-21 09:18:46.024456,1,
1695,22af75f5-cabe-46e8-90b3-08613327f389,,83291,2023-07-21 09:18:48.784861,1,
1696,22af75f5-cabe-46e8-90b3-08613327f389,,91601,2023-07-21 09:20:03.409300,1,
1697,22af75f5-cabe-46e8-90b3-08613327f389,,90043,2023-07-21 09:20:13.284585,1,


In [184]:
outfit.gpttag = outfit.gpttag.apply(lambda x: [item2idx[item] for item in x])


In [185]:
temp = pd.merge(total, outfit, on='outfit_id',how='left')
temp.head()

Unnamed: 0,session_id,user_id,outfit_id,timestamp,rating,view_type,gpttag,gender,age,img_url,origin_url,reporter,tags,brands,region,occupation,style,date
0,7708c8e7-4292-4ff9-99b1-27be20427e42,1.0,83783,23-07-19 09:26:07,1,,"[1365, 167, 329, 36, 141]",M,67,https://codidatabucket.s3.ap-northeast-2.amazo...,https://www.musinsa.com/mz/streetsnap/view/83783,블루 컬러 옥스포드 셔츠와 아이보리 치노팬츠를 매치한 룩입니다.,"['남성', '셔츠', '치노팬츠', '여름']",[],기타 서울,모델,댄디,2022/08/08
1,7708c8e7-4292-4ff9-99b1-27be20427e42,1.0,83800,23-07-19 09:26:10,1,,"[112, 14, 209, 41, 141]",M,62,https://codidatabucket.s3.ap-northeast-2.amazo...,https://www.musinsa.com/mz/streetsnap/view/83800,그린 컬러 반소매 셔츠와 화이트 팬츠를 매치한 룩입니다.,"['남성', '셔츠', '치노팬츠', '그린', '여름']",[],기타 서울,모델,캐주얼,2022/07/27
2,7708c8e7-4292-4ff9-99b1-27be20427e42,1.0,83791,23-07-19 09:26:12,1,,"[167, 329, 440, 36, 1658, 6, 224]",M,66,https://codidatabucket.s3.ap-northeast-2.amazo...,https://www.musinsa.com/mz/streetsnap/view/83791,"블루 컬러 린넨 셔츠와 아이보리 치노팬츠, 컨버스 스니커즈를 매치한 룩입니다.","['남성', '셔츠', '치노팬츠', '블루', '여름']",[],기타 서울,모델,캐주얼,2022/07/27
3,7708c8e7-4292-4ff9-99b1-27be20427e42,1.0,84029,23-07-19 09:26:16,1,,"[72, 22, 1, 316, 14]",M,25,https://codidatabucket.s3.ap-northeast-2.amazo...,https://www.musinsa.com/mz/streetsnap/view/84029,"블랙 컬러의 시스루 티셔츠 ,블랙 컬러 팬츠를 매치한 룩입니다.","['남성', '블랙', '시스루', '팬츠', '여름']",[],홍대/신촌,정보없음,스트릿,2022/08/18
4,7708c8e7-4292-4ff9-99b1-27be20427e42,1.0,83706,23-07-19 09:26:18,1,,"[126, 122, 112, 22, 1, 14, 41]",M,29,https://codidatabucket.s3.ap-northeast-2.amazo...,https://www.musinsa.com/mz/streetsnap/view/83706,"화이트 컬러의 반소매 티셔츠와 블랙 팬츠, 브라운 부츠를 매칭한 룩입니다.","['남성', '티셔츠', '블랙팬츠', '부츠', '여름']",[],홍대/신촌,자영업,아메리칸 캐주얼,2022/08/04


In [186]:
session2idx = {session:idx for idx, session in enumerate(temp.session_id.unique())}
outfit2idx = {outfit:idx for idx, outfit in enumerate(outfit.outfit_id.unique())}
temp['session_id'] = temp['session_id'].apply(lambda x: session2idx[x])
temp['outfit_id'] = temp['outfit_id'].apply(lambda x: outfit2idx[x])
temp

Unnamed: 0,session_id,user_id,outfit_id,timestamp,rating,view_type,gpttag,gender,age,img_url,origin_url,reporter,tags,brands,region,occupation,style,date
0,0,1.0,8120,23-07-19 09:26:07,1,,"[1365, 167, 329, 36, 141]",M,67,https://codidatabucket.s3.ap-northeast-2.amazo...,https://www.musinsa.com/mz/streetsnap/view/83783,블루 컬러 옥스포드 셔츠와 아이보리 치노팬츠를 매치한 룩입니다.,"['남성', '셔츠', '치노팬츠', '여름']",[],기타 서울,모델,댄디,2022/08/08
1,0,1.0,8132,23-07-19 09:26:10,1,,"[112, 14, 209, 41, 141]",M,62,https://codidatabucket.s3.ap-northeast-2.amazo...,https://www.musinsa.com/mz/streetsnap/view/83800,그린 컬러 반소매 셔츠와 화이트 팬츠를 매치한 룩입니다.,"['남성', '셔츠', '치노팬츠', '그린', '여름']",[],기타 서울,모델,캐주얼,2022/07/27
2,0,1.0,8139,23-07-19 09:26:12,1,,"[167, 329, 440, 36, 1658, 6, 224]",M,66,https://codidatabucket.s3.ap-northeast-2.amazo...,https://www.musinsa.com/mz/streetsnap/view/83791,"블루 컬러 린넨 셔츠와 아이보리 치노팬츠, 컨버스 스니커즈를 매치한 룩입니다.","['남성', '셔츠', '치노팬츠', '블루', '여름']",[],기타 서울,모델,캐주얼,2022/07/27
3,0,1.0,8083,23-07-19 09:26:16,1,,"[72, 22, 1, 316, 14]",M,25,https://codidatabucket.s3.ap-northeast-2.amazo...,https://www.musinsa.com/mz/streetsnap/view/84029,"블랙 컬러의 시스루 티셔츠 ,블랙 컬러 팬츠를 매치한 룩입니다.","['남성', '블랙', '시스루', '팬츠', '여름']",[],홍대/신촌,정보없음,스트릿,2022/08/18
4,0,1.0,8151,23-07-19 09:26:18,1,,"[126, 122, 112, 22, 1, 14, 41]",M,29,https://codidatabucket.s3.ap-northeast-2.amazo...,https://www.musinsa.com/mz/streetsnap/view/83706,"화이트 컬러의 반소매 티셔츠와 블랙 팬츠, 브라운 부츠를 매칭한 룩입니다.","['남성', '티셔츠', '블랙팬츠', '부츠', '여름']",[],홍대/신촌,자영업,아메리칸 캐주얼,2022/08/04
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27137,305,0.0,2483,23-07-21 09:59:32,0,journey,"[1817, 23, 9]",F,23,https://codidatabucket.s3.ap-northeast-2.amazo...,https://www.musinsa.com/mz/streetsnap/view/82692,블랙 컬러의 크롭 카디건과 투톤 카고 팬츠를 매치한 룩입니다.,"['여성', '크롭', '카디건', '카고팬츠', '여름']",[],홍대/신촌,학생,캐주얼,2022/06/27
27138,305,0.0,9585,23-07-21 09:59:32,0,journey,"[747, 58, 3198, 3199, 3200, 93]",M,25,https://codidatabucket.s3.ap-northeast-2.amazo...,https://www.musinsa.com/mz/streetsnap/view/89698,베이지 컬러의 리프로덕션 오브 파운드 밀리터리 스니커즈를 매칭한 룩입니다.,"['봄', '캐주얼', '남성', '스니커즈', '리프로덕션 오브 파운드']",['리프로덕션 오브 파운드'],동대문,모델,캐주얼,2023/03/24
27139,305,0.0,7303,23-07-21 09:59:32,0,journey,"[1705, 119, 941, 129, 165, 1]",F,27,https://codidatabucket.s3.ap-northeast-2.amazo...,https://www.musinsa.com/mz/streetsnap/view/73424,블랙 컬러의 원피스와 스트라이프 패턴의 가방으로 연출한 코디 입니다.,"['원피스', '여름', '캐주얼']",[],신사/압구정,모델,캐주얼,2021/08/03
27140,305,0.0,8920,23-07-21 09:59:32,0,journey,"[138, 224, 79, 4401]",M,21,https://codidatabucket.s3.ap-northeast-2.amazo...,https://www.musinsa.com/mz/streetsnap/view/79117,아워레가시x스투시 셔츠 재킷과 스웨트팬츠를 매칭한 룩입니다.,"['셔츠', '재킷', '봄', '캐주얼']",[],홍대/신촌,학생,캐주얼,2022/02/13


In [187]:
train = temp[['session_id', 'outfit_id', 'rating', 'gpttag', 'timestamp']]

In [188]:
train.head()

Unnamed: 0,session_id,outfit_id,rating,gpttag,timestamp
0,0,8120,1,"[1365, 167, 329, 36, 141]",23-07-19 09:26:07
1,0,8132,1,"[112, 14, 209, 41, 141]",23-07-19 09:26:10
2,0,8139,1,"[167, 329, 440, 36, 1658, 6, 224]",23-07-19 09:26:12
3,0,8083,1,"[72, 22, 1, 316, 14]",23-07-19 09:26:16
4,0,8151,1,"[126, 122, 112, 22, 1, 14, 41]",23-07-19 09:26:18


In [217]:
class MAB(object):
    def __init__(self, num_arms, outfits, tags, num_sample):
        self.alpha = np.ones(num_arms)
        self.beta = np.ones(num_arms)
        self.outfits = outfits
        self.tags = tags
        self.num_sample = num_sample
        
    def update(self, arm, reward):
        for tag in self.tags[arm]:
            self.alpha[tag] += reward
            self.beta[tag] += 1 - reward
        
    def sample(self):
        rvs = []
        for i in range(len(self.alpha)):
            rvs.append(beta(self.alpha[i], self.beta[i]).rvs())
        rvs = np.array(rvs)
        
        probs = []
        for tags in self.tags:
            probs.append(np.mean(rvs[tags]))
            
        probs = probs / np.sum(probs)
        samples = np.random.choice(self.outfits, self.num_sample, p=probs)
        
        return samples
    

In [191]:
outfit.outfit_id = outfit.outfit_id.apply(lambda x: outfit2idx[x])

In [218]:
mab = MAB(5038, outfit.outfit_id.values, outfit.gpttag.values, 10)

In [219]:
temp_df = train.loc[train.session_id==1]

In [220]:
arm_list, rating_list = temp_df.outfit_id.values, temp_df.rating.values

In [221]:
for arm, rating in zip(arm_list, rating_list):
    mab.update(arm, rating)

In [222]:
mab.sample()

array([  255,  4894,   111,  6115, 10381,  5775,  6814,   445, 10059,
        3943])

In [223]:
mab.sample()

array([10146, 10097,   685,  9781,  2138,  1766,   736,  1428,  7864,
        5583])

In [None]:
for _ in range(10):
    samples = mab.sample()
    for item in samples:
        