# Info
본 파일은 로스트아크의 캐릭터 스펙(Feature: 각인, 보석, 카드 등)에 따른 아이템 레벨을 예측하고 

현재 아이템 레벨에 비해 스펙이 낮을 경우 스펙을 높일 것을 추천하고 

아이템 레벨에 비해 스펙이 높을 경우 아이템 레벨을 높일 것을 추천하는 것을 목표로 함.

---------------------------------------
더 나아가 어떤 스펙을 어느 정도로 올리면 좋은지도 추천해주고자 함.

----------------------------
현재 MAE 9~10

목표 MAE 5

1. profile_table: expeditionLevel, totalSkillPoint, characterLevel, itemMaxLevel(Target)
2. accessory_table: accessory_quality, accessory_grade, accessory_tier
3. avatar_table: avatar_grade
4. card_table: setName, setPoint
5. engraving_table: grantName, grantPoint, engraving_name, engraving_level
6. equipment_table: equipment_quality, equipment_grade, equipment_setLevel
7. gem_table: gem_grade, gem_level, gem_tier
8. skill_table: skill_tripod_point
9. stats_table: stat_값

추가 고려 변수
ACC: 팔찌_effect, AS_setLevel
EQP: 장비_alchemyName, 장비_alchemyPoint

PCA
tripod, stat

----------------
딜러, 서폿간의 구분으로 2개의 모델 개발?

In [10]:
import sys
sys.path.append('./package')
import lostark as lok

In [11]:
import pandas as pd
import numpy as np
import joblib
from pprint import pprint
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [25]:
tables = ['profile_table','accessory_table', 'avatar_table', 'card_table', 'engraving_table', 
         'equipment_table', 'gem_table', 'skill_table', 'stats_table']
accessories = ['목걸이', '귀걸이1', '귀걸이2', '반지1', '반지2']
accessory_column = ['_quality', '_grade', '_tier']
avatars = ['무기1', '무기2', '상의1', '상의2', '하의1', '하의2']
avatar_column = ['_grade']
equipments = ['무기', '투구', '어깨', '상의', '하의', '장갑']
equipment_column = ['_quality', '_grade', '_setLevel']
gem_column = ['_grade', '_level', '_tier']
stat_column = ['치명', '특화', '신속', '제압', '인내', '숙련']

sql = f"""SELECT profile_table.characterCode, profile_table.expeditionLevel, profile_table.totalSkillPoint,
profile_table.characterLevel, profile_table.itemMaxLevel,

{', '.join(f"accessory_table.{accessory}{column}" for accessory in accessories for column in accessory_column)},

{', '.join(f"avatar_table.{avatar}{column}" for avatar in avatars for column in avatar_column)},

{', '.join(f"card_table.setName{i}, card_table.setPoint{i}" for i in range(1, 5))},

{', '.join(f"engraving_table.grantName{i}, engraving_table.grantPoint{i}" for i in range(1, 3))},
{', '.join(f"engraving_table.engraving{i}_name, engraving_table.engraving{i}_level" for i in range(1, 12))},

{', '.join(f"equipment_table.{equipment}{column}" for equipment in equipments for column in equipment_column)},

{', '.join(f"gem_table.gem{i}{column}" for i in range(1, 12) for column in gem_column)},

{', '.join(f"skill_table.skill{i}_tripod{j}_point" for i in range(1, 17) for j in range(1, 4))},

{', '.join(f"stats_table.{column}_값" for column in stat_column)}

FROM profile_table
{' '.join(f"LEFT JOIN {tables[i+1]} ON {tables[i]}.characterCode = {tables[i+1]}.characterCode"
for i in range(len(tables)-1))};
"""
db, cursor = lok.get_db_cursor()
cursor.execute(sql)
df = pd.DataFrame(cursor.fetchall())
db.close()

In [26]:
columns = ["characterCode", "expeditionLevel", "totalSkillPoint", "characterLevel", "itemMaxLevel"]
for accessory in accessories:
    for column in accessory_column:
        columns.append(f"{accessory}{column}")
for avatar in avatars :
    for column in avatar_column:
        columns.append(f"avatar_{avatar}{column}") 
for i in range(1, 5):
    columns.append(f"card_setName{i}")
    columns.append(f"card_setPoint{i}")
for i in range(1, 3):
    columns.append(f"engraving_grantName{i}")
    columns.append(f"engraving_grantPoint{i}")
for i in range(1, 12):
    columns.append(f"engraving{i}_name")
    columns.append(f"engraving{i}_level")
for equipment in equipments:
    for column in equipment_column:
        columns.append(f"equipment_{equipment}{column}")
for i in range(1, 12):
    for column in gem_column:
        columns.append(f"gem{i}{column}")
for i in range(1, 17):
    for j in range(1, 4):
        columns.append(f"skill{i}_tripod{j}_point")
for column in stat_column:
    columns.append(f"{column}_값")

df.columns = columns

In [27]:
df.to_csv('orgin_df.csv', index=False)

In [14]:
# 라벨 인코딩
from sklearn.preprocessing import LabelEncoder
grade_le = LabelEncoder()
uniqueGrade = ['일반', '고급', '전설', '영웅', '희귀', '유물', '고대', '에스더', 'None']
grade_le_fit = grade_le.fit(uniqueGrade)

uniqueCardSet = []
for i in range(1, 5):
    uniqueCardSet = list(set(uniqueCardSet + list(df[f"card_setName{i}"].fillna('None').unique())))
cardSet_le = LabelEncoder()
cardSet_le_fit = cardSet_le.fit(uniqueCardSet)

uniqueEngraving = []
for i in range(1, 3):
    uniqueEngraving = list(set(uniqueEngraving + list(df[f"engraving_grantName{i}"].fillna('None').unique())))
for i in range(1, 12):
    uniqueEngraving = list(set(uniqueEngraving + list(df[f"engraving{i}_name"].fillna('None').unique())))
engraving_le = LabelEncoder()
engraving_le_fit = engraving_le.fit(uniqueEngraving)

joblib.dump(grade_le_fit, './label/grade_label.pkl')
joblib.dump(cardSet_le_fit, './label/card_label.pkl')
joblib.dump(engraving_le_fit, './label/engraving_label.pkl')

['./label/engraving_label.pkl']

In [15]:
# 결측치 처리, 라벨 인코딩, 데이터타입 수정
df['itemMaxLevel'] = df['itemMaxLevel'].astype('float64')
for accessory in accessories:
    df[f'{accessory}_quality'] = df[f'{accessory}_quality'].fillna(-1).astype('int64')
    df[f'{accessory}_grade'] = grade_le_fit.transform(df[f'{accessory}_grade'].fillna('None'))
    df[f'{accessory}_tier'] = df[f'{accessory}_tier'].fillna(-1).astype('int64')
for avatar in avatars :
    df[f"avatar_{avatar}_grade"] =  grade_le_fit.transform(df[f"avatar_{avatar}_grade"].fillna('None'))
for i in range(1, 5):
    df[f"card_setName{i}"] = cardSet_le_fit.transform(df[f"card_setName{i}"].fillna('None'))
    df[f"card_setPoint{i}"] = df[f"card_setPoint{i}"].fillna(-1).astype('int64')
for i in range(1, 3):
    df[f"engraving_grantName{i}"] = engraving_le_fit.transform(df[f"engraving_grantName{i}"].fillna('None'))
    df[f"engraving_grantPoint{i}"] = df[f"engraving_grantPoint{i}"].fillna('-1').astype('int64')
for i in range(1, 12):
    df[f"engraving{i}_name"] = engraving_le_fit.transform(df[f"engraving{i}_name"].fillna('None'))
    df[f"engraving{i}_level"] = df[f"engraving{i}_level"].fillna(-1).astype('int64')
for equipment in equipments:
    df[f"equipment_{equipment}_grade"] = grade_le_fit.transform(df[f"equipment_{equipment}_grade"].fillna('None'))  
    df[f"equipment_{equipment}_setLevel"] = df[f"equipment_{equipment}_setLevel"].fillna(-1).astype('int64')
    df[f"equipment_{equipment}_quality"] = df[f"equipment_{equipment}_quality"].fillna(-1).astype('int64')
for i in range(1, 12):
    df[f"gem{i}_grade"] = grade_le_fit.transform(df[f"gem{i}_grade"].fillna('None'))
    df[f"gem{i}_level"] = df[f"gem{i}_level"].fillna(-1).astype('int64')
    df[f"gem{i}_tier"] = df[f"gem{i}_tier"].fillna(-1).astype('int64')
for i in range(1, 17):
    for j in range(1, 4):
        df[f"skill{i}_tripod{j}_point"] = df[f"skill{i}_tripod{j}_point"].fillna(-1).astype('int64')


In [23]:
df.to_csv('df.csv', index=False)

아래는 다중공선성을 고려하여 PCA를 적용한 데이터

In [17]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
df_scale = scaler.fit_transform(df)
df_scale = pd.DataFrame(df_scale, columns=df.columns)
joblib.dump(scaler, './Scaler/Scaler.pkl')

['./Scaler/Scaler.pkl']

In [18]:
dfc = df.copy()
from sklearn.decomposition import PCA
# gem grade
n_components = 2
pca = PCA(n_components=n_components)
columns = [f'gem{i}_grade' for i in range(1, 12)]
df_pca = pca.fit_transform(df_scale[columns])
joblib.dump(pca, './PCA/gem_grade_PCA.pkl')
dfc.drop(columns, axis=1, inplace=True)
df_pca = pd.DataFrame(df_pca, columns=['gem_grade_PCA1', 'gem_grade_PCA2'])
dfc = pd.concat([dfc, df_pca], axis=1)

# gem tier
n_components = 3
pca = PCA(n_components=n_components)
columns = [f'gem{i}_tier' for i in range(1, 12)]
df_pca = pca.fit_transform(df_scale[columns])
joblib.dump(pca, './PCA/gem_tier_PCA.pkl')
dfc.drop(columns, axis=1, inplace=True)
df_pca = pd.DataFrame(df_pca, columns=['gem_tier_PCA1', 'gem_tier_PCA2', 'gem_tier_PCA3'])
dfc = pd.concat([dfc, df_pca], axis=1)

# gem level
n_components = 2
pca = PCA(n_components=n_components)
columns = [f'gem{i}_level' for i in range(1, 12)]
df_pca = pca.fit_transform(df_scale[columns])
joblib.dump(pca, './PCA/gem_level_PCA.pkl')
dfc.drop(columns, axis=1, inplace=True)
df_pca = pd.DataFrame(df_pca, columns=['gem_level_PCA1', 'gem_level_PCA2'])
dfc = pd.concat([dfc, df_pca], axis=1)

# skill tripod point
n_components = 4
pca = PCA(n_components=n_components)
columns = [f'skill{i}_tripod{j}_point' for i in range(9, 17) for j in range(1, 4)]
df_pca = pca.fit_transform(df_scale[columns])
joblib.dump(pca, './PCA/skill_PCA.pkl')
dfc.drop(columns, axis=1, inplace=True)
df_pca = pd.DataFrame(df_pca, columns=['skill_tripod_point_PCA1', 'skill_tripod_point_PCA2',
    'skill_tripod_point_PCA3', 'skill_tripod_point_PCA4'])
dfc = pd.concat([dfc, df_pca], axis=1)

# equipment grade
n_components = 1
pca = PCA(n_components=n_components)
columns = [f'equipment_{e}_grade' for e in equipments]
df_pca = pca.fit_transform(df_scale[columns])
joblib.dump(pca, './PCA/equipment_grade_PCA.pkl')
df.drop(columns, axis=1, inplace=True)
df_pca = pd.DataFrame(df_pca, columns=['equipment_grade_PCA1'])
df = pd.concat([df, df_pca], axis=1)

# euqipment setLevel
n_components = 2
pca = PCA(n_components=n_components)
columns = [f'equipment_{e}_setLevel' for e in equipments]
df_pca = pca.fit_transform(df_scale[columns])
joblib.dump(pca, './PCA/equipment_setLevel_PCA.pkl')
dfc.drop(columns, axis=1, inplace=True)
df_pca = pd.DataFrame(df_pca, columns=['equipment_setLevel_PCA1', 'equipment_setLevel_PCA2'])
dfc = pd.concat([dfc, df_pca], axis=1)

# euqipment quality
n_components = 2
pca = PCA(n_components=n_components)
columns = [f'equipment_{e}_quality' for e in equipments]
df_pca = pca.fit_transform(df_scale[columns])
joblib.dump(pca, './PCA/equipment_quality_PCA.pkl')
dfc.drop(columns, axis=1, inplace=True)
df_pca = pd.DataFrame(df_pca, columns=['equipment_quality_PCA1', 'equipment_quality_PCA2'])
dfc = pd.concat([dfc, df_pca], axis=1)


# explained_variance_ratio = pca.explained_variance_ratio_
# print("Explained Variance Ratio:", explained_variance_ratio)
# print("Total Explained Variance:", np.sum(explained_variance_ratio))

# cumulative_explained_variance_ratio = np.cumsum(explained_variance_ratio)
# print("Cumulative Explained Variance Ratio:", cumulative_explained_variance_ratio)

# import matplotlib.pyplot as plt
# plt.plot(pca.explained_variance_ratio_)

In [21]:
dfc.head(2)

Unnamed: 0,characterCode,expeditionLevel,totalSkillPoint,characterLevel,itemMaxLevel,목걸이_quality,목걸이_grade,목걸이_tier,귀걸이1_quality,귀걸이1_grade,귀걸이1_tier,귀걸이2_quality,귀걸이2_grade,귀걸이2_tier,반지1_quality,반지1_grade,반지1_tier,반지2_quality,반지2_grade,반지2_tier,avatar_무기1_grade,avatar_무기2_grade,avatar_상의1_grade,avatar_상의2_grade,avatar_하의1_grade,avatar_하의2_grade,card_setName1,card_setPoint1,card_setName2,card_setPoint2,card_setName3,card_setPoint3,card_setName4,card_setPoint4,engraving_grantName1,engraving_grantPoint1,engraving_grantName2,engraving_grantPoint2,engraving1_name,engraving1_level,engraving2_name,engraving2_level,engraving3_name,engraving3_level,engraving4_name,engraving4_level,engraving5_name,engraving5_level,engraving6_name,engraving6_level,engraving7_name,engraving7_level,engraving8_name,engraving8_level,engraving9_name,engraving9_level,engraving10_name,engraving10_level,engraving11_name,engraving11_level,equipment_무기_grade,equipment_투구_grade,equipment_어깨_grade,equipment_상의_grade,equipment_하의_grade,equipment_장갑_grade,skill1_tripod1_point,skill1_tripod2_point,skill1_tripod3_point,skill2_tripod1_point,skill2_tripod2_point,skill2_tripod3_point,skill3_tripod1_point,skill3_tripod2_point,skill3_tripod3_point,skill4_tripod1_point,skill4_tripod2_point,skill4_tripod3_point,skill5_tripod1_point,skill5_tripod2_point,skill5_tripod3_point,skill6_tripod1_point,skill6_tripod2_point,skill6_tripod3_point,skill7_tripod1_point,skill7_tripod2_point,skill7_tripod3_point,skill8_tripod1_point,skill8_tripod2_point,skill8_tripod3_point,치명_값,특화_값,신속_값,제압_값,인내_값,숙련_값,gem_grade_PCA1,gem_grade_PCA2,gem_tier_PCA1,gem_tier_PCA2,gem_tier_PCA3,gem_level_PCA1,gem_level_PCA2,skill_tripod_point_PCA1,skill_tripod_point_PCA2,skill_tripod_point_PCA3,skill_tripod_point_PCA4,equipment_setLevel_PCA1,equipment_setLevel_PCA2,equipment_quality_PCA1,equipment_quality_PCA2
0,1,300,420,60,1385.0,76,4,3,100,7,3,89,4,3,30,7,3,74,7,3,4,0,4,0,4,0,48,30,3,-1,3,-1,3,-1,58,12,64,12,58,3,64,3,9,1,8,1,0,-1,0,-1,0,-1,0,-1,0,-1,0,-1,0,-1,7,7,7,7,7,7,1,1,-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,977.0,60.0,745.0,70.0,66.0,66.0,-0.687281,0.100378,2.614,2.50586,-1.266337,-0.381986,0.635057,-0.61873,0.180154,-0.090794,0.071865,0.786934,0.068493,3.41926,0.111737
1,2,0,0,1,0.0,-1,0,-1,-1,0,-1,-1,0,-1,-1,0,-1,-1,0,-1,0,0,0,0,0,0,3,-1,3,-1,3,-1,3,-1,0,-1,0,-1,0,-1,0,-1,0,-1,0,-1,0,-1,0,-1,0,-1,0,-1,0,-1,0,-1,0,-1,0,0,0,0,0,0,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,,,,,,,-2.307297,-0.119824,-2.450812,-0.135185,-0.066015,-2.35967,-0.123886,-0.61873,0.180154,-0.090794,0.071865,-2.49183,0.237591,-3.182018,0.041192


In [24]:
dfc.to_csv('dfPCA.csv', index=False)

In [8]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
x = df.drop(columns=['itemMaxLevel'])
y = df['itemMaxLevel'].astype('float64')

xtr, xt, ytr, yt = train_test_split(x, y, train_size=0.7, random_state=42)

In [9]:
x = dfc.drop(columns=['itemMaxLevel'])
y = dfc['itemMaxLevel'].astype('float64')

xtrP, xtP, ytrP, ytP = train_test_split(x, y, train_size=0.7, random_state=42)

NameError: name 'dfc' is not defined

모델 학습

In [14]:
# 학습에 시간이 오래 걸림
rf = RandomForestRegressor(random_state=23)
rf.fit(xtr, ytr)

joblib.dump(rf, './model/rf.pkl')

['./model/rf.pkl']

In [19]:
rfPCA = RandomForestRegressor(random_state=23)
rfPCA.fit(xtrP, ytrP)

joblib.dump(rfPCA, './model/rfPCA.pkl')

['./model/rfPCA.pkl']

모델 로드

In [3]:
import joblib
model = joblib.load('./model/Auto.pkl')
modelPCA = joblib.load('./model/AutoPCA.pkl')

성능 평가

In [20]:
predict_train = model.predict(xtr)
print(mean_squared_error(ytr, predict_train))
print(mean_absolute_error(ytr, predict_train))
print(r2_score(ytr, predict_train))

183.48156038850962
10.016712048876864
0.9634976697266031


In [28]:
predict_train = modelPCA.predict(xtrP)
print(mean_squared_error(ytrP, predict_train))
print(mean_absolute_error(ytrP, predict_train))
print(r2_score(ytrP, predict_train))

175.8329985817211
9.81693471950053
0.9650192958158773


---

In [41]:
predict_test = model.predict(xt)
print(mean_squared_error(yt, predict_test))
print(mean_absolute_error(yt, predict_test))
print(r2_score(yt, predict_test))

184.49732150095295
10.028431709412569
0.9630768522567383


In [44]:
predict_test = modelPCA.predict(xtP)
print(mean_squared_error(ytP, predict_test))
print(mean_absolute_error(ytP, predict_test))
print(r2_score(ytP, predict_test))

175.90177204801654
9.824259420690016
0.9647970655357352


In [1]:
import lostark as lok
profile_responses = lok.get_total_profile_responses('roxy0')
if profile_responses != None:
    lok.insert_character_data('roxy0')

In [2]:
lok.get_df_all_raw_table('roxy0')

Unnamed: 0,characterCode,characterName,ArmoryProfile,ArmoryEquipment,ArmoryAvatars,ArmorySkills,ArmoryEngraving,ArmoryCard,ArmoryGem,ColosseumInfo,Collectibles
0,570279,Roxy0,{'CharacterImage': 'https://img.lostark.co.kr/...,"[{'Type': '무기', 'Name': '+17 굳건한 지배의 균형 롱 스태프'...","[{'Type': '무기 아바타', 'Name': '고결한 귀공녀 롱 스태프', '...","[{'Name': '블레이즈', 'Icon': 'https://cdn-lostark...","{'Engravings': [{'Slot': 0, 'Name': '점화', 'Ico...","{'Cards': [{'Slot': 0, 'Name': '원포', 'Icon': '...","{'Gems': [{'Slot': 0, 'Name': '7레벨 멸화의 보석', 'I...","{'Rank': 0, 'PreRank': 0, 'Exp': 0, 'Colosseum...","[{'Type': '모코코 씨앗', 'Icon': 'https://cdn-losta..."


In [45]:
lok.get_table_df('profile_table', 'Eris0')

Unnamed: 0,characterCode,expeditionLevel,pvpGradeName,townLevel,title,guildMemberGrade,guildName,usingSkillPoint,totalSkillPoint,지성,담력,매력,친절,serverName,characterName,characterLevel,characterClassName,itemAvgLevel,itemMaxLevel
0,570283,240,19급,70,거친,임원,Rudius,414,414,606,619,547,524,아브렐슈드,Eris0,60,블레이드,1551.67,1551.67


In [1]:
import sys
sys.path.append('./package')
import lostark as lok

In [2]:
import joblib
model = joblib.load('./model/Auto.pkl')
modelPCA = joblib.load('./model/AutoPCA.pkl')

In [3]:
cdf = lok.get_predict_df('연료전지')
x = cdf.drop(columns=['itemMaxLevel'])
y = cdf['itemMaxLevel'].astype('float64')
predict_data = model.predict(x)
predict_data

CatBoostError: C:/Go_Agent/pipelines/BuildMaster/catboost.git/catboost/libs/data/model_dataset_compatibility.cpp:81: At position 60 should be feature with name equipment_무기_setLevel (found equipment_무기_grade).

In [31]:
!pip install pycaret[notebook]

Collecting pycaret[notebook]
  Using cached pycaret-3.2.0-py3-none-any.whl (484 kB)
Collecting joblib>=1.2.0
  Using cached joblib-1.3.2-py3-none-any.whl (302 kB)
Collecting matplotlib<=3.6,>=3.3.0
  Using cached matplotlib-3.6.0-cp310-cp310-win_amd64.whl (7.2 MB)
Collecting importlib-metadata>=4.12.0
  Using cached importlib_metadata-7.0.1-py3-none-any.whl (23 kB)
Collecting tbats>=1.1.3
  Using cached tbats-1.1.3-py3-none-any.whl (44 kB)
Collecting schemdraw==0.15
  Using cached schemdraw-0.15-py3-none-any.whl (106 kB)
Collecting sktime!=0.17.1,!=0.17.2,!=0.18.0,<0.22.0,>=0.16.1
  Using cached sktime-0.21.1-py3-none-any.whl (17.1 MB)
Collecting pmdarima!=1.8.1,<3.0.0,>=1.8.0
  Using cached pmdarima-2.0.4-cp310-cp310-win_amd64.whl (613 kB)
Collecting pyod>=1.0.8
  Using cached pyod-1.1.3-py3-none-any.whl
Collecting deprecation>=2.1.0
  Using cached deprecation-2.1.0-py2.py3-none-any.whl (11 kB)
Collecting scikit-plot>=0.3.7
  Using cached scikit_plot-0.3.7-py3-none-any.whl (33 kB)
Col



In [32]:
from pycaret.regression import *