In [2]:
import pandas as pd
import json

df = pd.read_csv('../data/df_problems_tags.csv', index_col=0)

In [3]:
with open('../data/dummy/accountdata.json', 'r', encoding='utf-8') as f:
    json_data = json.load(f)

In [4]:
dum_raw = json_data['solved']

In [5]:
dum_raw = pd.DataFrame(dum_raw)

In [6]:
dum_raw.head(3)

Unnamed: 0,id,status
0,1000,solved
1,2525,solved
2,7287,solved


In [7]:
# 푼 문제 id 리스트
solved_problems = list(dum_raw.loc[dum_raw['status']=='solved']['id'])

In [8]:
dum = pd.merge(dum_raw, df, how='left', left_on='id', right_on='problemId')

In [9]:
# 푼 문제 목록 추출
solved_list = dum.loc[dum['status']=='solved'].sort_values('id')
# 태그별로 문제 적게 푼 순으로 태그 나열
solved_list = solved_list.groupby('bojTagId').count().sort_values('problemId', ascending=True).reset_index()['bojTagId']
# list 형태로 변형
solved_list = list(solved_list)

In [10]:
# 푼 문제 평균 레벨 추출
solved_levels = {}

for solved_tag in solved_list:
    cond1 = (dum['status']=='solved')
    cond2 = (dum['bojTagId']==solved_tag)
    cond_list = list(dum.loc[cond1&cond2]['level'])
    solved_levels[solved_tag] = int(sum(cond_list)/len(cond_list))

In [11]:
# 시도 문제 목록 추출
tried_list = dum.loc[dum['status']=='tried'].sort_values('problemId')
# 태그별로 시도 많은 순으로 태그 나열
tried_list = tried_list.groupby('bojTagId').count().sort_values('problemId', ascending=False).reset_index()['bojTagId']
# list 형태로 변형
tried_list = list(tried_list)

In [12]:
# 시도 문제 중 가장 낮은 레벨 추출
tried_levels = {}

for tried_tag in tried_list:
    cond1 = (dum['status']=='tried')
    cond2 = (dum['bojTagId']==tried_tag)
    cond_list = list(dum.loc[cond1&cond2]['level'])
    tried_levels[tried_tag] = int(sum(cond_list)/len(cond_list))
#     tried_levels[tried_tag] = sorted(list(dum.loc[cond1&cond2]['level']))[-1]

In [13]:
print('푼 문제 태그 목록 : ', solved_list)
print('푼 문제 난이도(내림차순) : ', solved_levels)
print()
print('시도 문제 태그 목록 : ', tried_list)
print('시도 문제 난이도(오름차순) : ', tried_levels)

푼 문제 태그 목록 :  [124, 102, 175, 121, 97, 125, 7, 158, 11, 126, 71, 127, 72, 5, 12, 25, 141, 95, 33, 62, 170, 136, 9, 67, 100, 6, 120, 24, 14, 117, 26, 8, 60, 73]
푼 문제 난이도(내림차순) :  {124: 3, 102: 5, 175: 7, 121: 2, 97: 6, 125: 7, 7: 9, 158: 5, 11: 10, 126: 10, 71: 7, 127: 9, 72: 7, 5: 9, 12: 7, 25: 6, 141: 10, 95: 7, 33: 7, 62: 9, 170: 8, 136: 7, 9: 7, 67: 7, 100: 3, 6: 5, 120: 8, 24: 9, 14: 6, 117: 7, 26: 6, 8: 4, 60: 3, 73: 7}

시도 문제 태그 목록 :  [121, 124, 136, 175]
시도 문제 난이도(오름차순) :  {121: 2, 124: 2, 136: 7, 175: 7}


In [14]:
tag_list = tried_list[:5] + solved_list[:5]
tag_list = list(set(tag_list))

In [15]:
tag_list

[97, 102, 136, 175, 121, 124]

In [16]:
recomm_df = pd.DataFrame(columns=df.columns)

In [17]:
# 푼 문제 제외
df = df.loc[~df['problemId'].isin(solved_problems)]

In [18]:
for tag in tag_list:
    if tag in solved_levels:
        tag_level = solved_levels[tag]
    elif tag in tried_levels:
        tag_level = tried_levels[tag]
    else:
        print('error')
        tag_level = 0        
    print('tag: ', tag, 'tag level: ', tag_level)
    
    recomm_tag_level = [tag_level]
    for i in range(2):
        recomm_tag_level.append(tag_level+(i+1))
        if tag_level-(i+1) >= 0:
            recomm_tag_level.append(tag_level-(i+1))
    print('recomm tag levels: ', sorted(recomm_tag_level))
    
    tmp_df = df.loc[(df['bojTagId']==tag)&(df['level'].isin(recomm_tag_level))]
    tmp_df = tmp_df.sort_values(['acceptedUserCount', 'averageTries'], ascending=[False, True])[:10]
    
#     recomm_df = recomm_df.append(tmp_df)
    recomm_df = pd.concat([recomm_df, tmp_df])

tag:  97 tag level:  6
recomm tag levels:  [4, 5, 6, 7, 8]
tag:  102 tag level:  5
recomm tag levels:  [3, 4, 5, 6, 7]
tag:  136 tag level:  7
recomm tag levels:  [5, 6, 7, 8, 9]
tag:  175 tag level:  7
recomm tag levels:  [5, 6, 7, 8, 9]
tag:  121 tag level:  2
recomm tag levels:  [0, 1, 2, 3, 4]
tag:  124 tag level:  3
recomm tag levels:  [1, 2, 3, 4, 5]


In [19]:
recomm_df = recomm_df.drop_duplicates('problemId').reset_index()

In [20]:
recomm_df = recomm_df.sort_values(['acceptedUserCount', 'averageTries'], ascending=[False, True])

In [25]:
import random

recomm_list = list(recomm_df['problemId'])
recomm_list = random.sample(recomm_list, 10)
recomm_list = json.dumps(recomm_list)

In [26]:
recomm_list

'[9375, 2750, 2742, 10952, 10871, 11279, 11652, 1269, 1026, 1008]'

In [21]:
recomm_json = recomm_df.to_json(orient='records')

In [23]:
print(json.dumps(json.loads(recomm_json), indent="\t", ensure_ascii=False) )

[
	{
		"index": 3,
		"problemId": 1001,
		"titleKo": "A-B",
		"titles": "[{'language': 'ko', 'languageDisplayName': 'ko', 'title': 'A-B', 'isOriginal': True}]",
		"isSolvable": true,
		"isPartial": false,
		"acceptedUserCount": 146249,
		"level": 1,
		"votedUserCount": 8,
		"sprout": false,
		"givesNoRating": false,
		"isLevelLocked": true,
		"averageTries": 1.3962,
		"official": true,
		"tags": "[{'key': 'arithmetic', 'isMeta': False, 'bojTagId': 121, 'problemCount': 585, 'displayNames': [{'language': 'en', 'name': 'arithmetic', 'short': 'arithmetic'}, {'language': 'ko', 'name': '사칙연산', 'short': '사칙연산'}], 'problemId': 1001, 'tag_name': '사칙연산', 'tag_short': '사칙연산'}, {'key': 'implementation', 'isMeta': False, 'bojTagId': 102, 'problemCount': 3456, 'displayNames': [{'language': 'en', 'name': 'implementation', 'short': 'impl'}, {'language': 'ko', 'name': '구현', 'short': '구현'}], 'problemId': 1001, 'tag_name': '구현', 'tag_short': '구현'}, {'key': 'math', 'isMeta': False, 'bojTagId': 124, 'probl

In [30]:
recomm_df.columns

Index(['index', 'problemId', 'titleKo', 'titles', 'isSolvable', 'isPartial',
       'acceptedUserCount', 'level', 'votedUserCount', 'sprout',
       'givesNoRating', 'isLevelLocked', 'averageTries', 'official', 'tags',
       'key', 'isMeta', 'bojTagId', 'problemCount', 'tag_name', 'tag_short'],
      dtype='object')

In [29]:
recomm_df.sample(n=10)

Unnamed: 0,index,problemId,titleKo,titles,isSolvable,isPartial,acceptedUserCount,level,votedUserCount,sprout,...,isLevelLocked,averageTries,official,tags,key,isMeta,bojTagId,problemCount,tag_name,tag_short
41,16,1008,A/B,"[{'language': 'ko', 'languageDisplayName': 'ko...",True,False,111854,2,6,False,...,True,2.8891,True,"[{'key': 'arithmetic', 'isMeta': False, 'bojTa...",arithmetic,False,121,585,사칙연산,사칙연산
21,19338,11652,카드,"[{'language': 'ko', 'languageDisplayName': 'ko...",True,False,5652,7,31,False,...,False,3.5189,True,"[{'key': 'data_structures', 'isMeta': False, '...",hash_set,False,136,276,해시를 사용한 집합과 맵,해시를 사용한 집합과 맵
20,15041,9375,패션왕 신해빈,"[{'language': 'ko', 'languageDisplayName': 'ko...",True,False,7666,8,46,False,...,False,1.8261,True,"[{'key': 'combinatorics', 'isMeta': False, 'bo...",hash_set,False,136,276,해시를 사용한 집합과 맵,해시를 사용한 집합과 맵
29,4316,2910,빈도 정렬,"[{'language': 'ko', 'languageDisplayName': 'ko...",True,False,1283,8,14,False,...,False,2.2876,True,"[{'key': 'data_structures', 'isMeta': False, '...",hash_set,False,136,276,해시를 사용한 집합과 맵,해시를 사용한 집합과 맵
24,31586,17219,비밀번호 찾기,"[{'language': 'ko', 'languageDisplayName': 'ko...",True,False,4368,7,54,False,...,False,1.4009,True,"[{'key': 'data_structures', 'isMeta': False, '...",hash_set,False,136,276,해시를 사용한 집합과 맵,해시를 사용한 집합과 맵
28,8918,5568,카드 놓기,"[{'language': 'ko', 'languageDisplayName': 'ko...",True,False,2070,7,20,False,...,False,1.6575,True,"[{'key': 'bruteforcing', 'isMeta': False, 'boj...",hash_set,False,136,276,해시를 사용한 집합과 맵,해시를 사용한 집합과 맵
39,3,1001,A-B,"[{'language': 'ko', 'languageDisplayName': 'ko...",True,False,146249,1,8,False,...,True,1.3962,True,"[{'key': 'arithmetic', 'isMeta': False, 'bojTa...",arithmetic,False,121,585,사칙연산,사칙연산
4,17702,10815,숫자 카드,"[{'language': 'ko', 'languageDisplayName': 'ko...",True,False,13514,7,49,False,...,False,2.0437,True,"[{'key': 'binary_search', 'isMeta': False, 'bo...",sorting,False,97,1046,정렬,정렬
2,3046,2309,일곱 난쟁이,"[{'language': 'ko', 'languageDisplayName': 'ko...",True,False,24270,4,58,False,...,False,2.3444,True,"[{'key': 'bruteforcing', 'isMeta': False, 'boj...",sorting,False,97,1046,정렬,정렬
18,18013,10952,A+B - 5,"[{'language': 'ko', 'languageDisplayName': 'ko...",True,False,73819,3,4,False,...,True,1.9477,True,"[{'key': 'arithmetic', 'isMeta': False, 'bojTa...",implementation,False,102,3456,구현,구현


In [None]:
recomm_json = recomm_df.to_json(orient='records')