# csm 테이블 생성

csm 테이블은 분석 가능한 모든 캐릭터와 모든 캐릭터 사이의 유사성을 미리 계산해
놓은 테이블입니다.


In [1]:
import ast
import calendar
import datetime
from functools import partial
from operator import itemgetter
from pprint import pprint

import numpy as np
import pandas as pd

In [2]:
chars = pd.read_excel(
  './datasets/characters-similarity.xlsx',
  sheet_name='Table1'
)

def settify(expr: str) -> set:
  return set(ast.literal_eval(expr))

chars['styles'] = chars['styles'].apply(settify)
chars['colors'] = chars['colors'].apply(settify)
print(chars['styles'].head())
print(chars['colors'].head())

0           {쿨}
1     {액티브, 심플}
2    {심플, 엘레강스}
3         {액티브}
4          {심플}
Name: styles, dtype: object
0    {파랑색, 검정색}
1    {파랑색, 검정색}
2    {핑크색, 하양색}
3    {하양색, 빨강색}
4    {빨강색, 파랑색}
Name: colors, dtype: object


In [4]:
chars.index = chars['id']

In [6]:
chars = chars.drop(
  columns=[
    'id',
    'name_en',
    'name_ko',
    'image_icon',
    'species',
    'gender',
    'birthday_month',
    'birthday_day'
  ]
)

In [7]:
chars.head()

Unnamed: 0_level_0,birthday,personality,hobby,styles,colors
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
admiral,01-27,무뚝뚝,자연,{쿨},"{파랑색, 검정색}"
agents,07-02,아이돌,운동,"{액티브, 심플}","{파랑색, 검정색}"
agnes,04-21,단순 활발,놀이,"{심플, 엘레강스}","{핑크색, 하양색}"
al,10-18,먹보,운동,{액티브},"{하양색, 빨강색}"
alfonso,06-09,먹보,놀이,{심플},"{빨강색, 파랑색}"


## 필요한 함수 정의


In [8]:
TODAY_YEAR = datetime.date.today().year
TODAY_LEAP = calendar.isleap(TODAY_YEAR)
DAYMOD = 183.0 if TODAY_LEAP else 182.5

def compare_simple(a, b):
  return 0.0 if a == b else 1.0

def compare_set(a: set, b: set):
  return 1.0 - len(a & b)/max(len(a), len(b))

def to_yday(bdstr: str) -> int:
  m, d = map(int, bdstr.split('-'))
  return datetime.date(TODAY_YEAR, m, d).timetuple().tm_yday
  
def compare_yday(a: str, b: str):
  a = to_yday(a)
  b = to_yday(b)
  delta = abs(a - b)
  if delta < DAYMOD:
    result = abs(delta)
  else:
    result = DAYMOD*2 - delta
  return result / DAYMOD


In [9]:
table = pd.DataFrame(index=chars.index, columns=chars.index)
table.head()

id,admiral,agents,agnes,al,alfonso,alice,alli,amelia,anabelle,anchovy,...,wartjr,weber,wendy,whitney,willow,winnie,wolfgang,yuka,zell,zucker
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
admiral,,,,,,,,,,,...,,,,,,,,,,
agents,,,,,,,,,,,...,,,,,,,,,,
agnes,,,,,,,,,,,...,,,,,,,,,,
al,,,,,,,,,,,...,,,,,,,,,,
alfonso,,,,,,,,,,,...,,,,,,,,,,


In [14]:
table.iat[0, 0]

nan