### 1. API에서 데이터 불러오기

In [None]:
import requests
import time
import pandas as pd
from concurrent.futures import ThreadPoolExecutor, as_completed

# API Key 및 헤더 설정
API_KEY = "api 키를 입력하세요"
headers = {"x-nxopen-api-key": API_KEY}
session = requests.Session()
session.headers.update(headers)

# 날짜 설정
target_date = "날짜를 입력하세요"

# STEP 1: 종합 랭킹 병렬 조회
print("\u25b6 STEP 1: 종합 랭킹 조회 (병렬)")

def fetch_ranking_page(page):
    url = f"https://open.api.nexon.com/maplestory/v1/ranking/overall?date={target_date}&page={page}"
    try:
        res = session.get(url, timeout=10)
        if res.status_code == 200:
            return [
                {"character_name": u["character_name"], "world_name": u["world_name"],"ranking": u.get("ranking")}
                for u in res.json().get("ranking", [])
            ]
        else:
            print(f"❌ Page {page} 단일: {res.status_code}")
    except Exception as e:
        print(f"❌ Page {page} 예외: {e}")
    return []

user_list = []
with ThreadPoolExecutor(max_workers=50) as executor:
    futures = [executor.submit(fetch_ranking_page, page) for page in range(1, 3001)]  # 600,000명까지만 조회
    for i, future in enumerate(as_completed(futures), 1):
        user_list.extend(future.result())
        if i % 100 == 0:
            print(f"🔹 STEP 1 진행률: {i} 페이지 완료")

print(f"총 {len(user_list)}명의 사용자 정보 획득.")

# 빠르게 테스트하기 위해 상위 100명만 선택
#user_list = user_list[:100]

# STEP 2: ocid 조회
print("\u25b6 STEP 2: ocid 조회 (동시 처리)")

def fetch_ocid(user):
    name = user["character_name"]
    world = user["world_name"]
    url = f"https://open.api.nexon.com/maplestory/v1/id?character_name={name}&world_name={world}"
    try:
        res = session.get(url, timeout=10)
        if res.status_code == 200:
            user["ocid"] = res.json().get("ocid")
        else:
            user["ocid"] = None
            print(f"❌ ocid 오류: {name} ({world}) - 상태 코드 {res.status_code}")
    except Exception as e:
        print(f"❌ ocid 예외: {name} ({world}) - {e}")
        user["ocid"] = None
    return user

with ThreadPoolExecutor(max_workers=50) as executor:
    futures = [executor.submit(fetch_ocid, user) for user in user_list]
    for future in as_completed(futures):
        _ = future.result()

print("ocid 조회 완료.")

# STEP 3: 상세 정보 조회
print("\u25b6 STEP 3: 세부 정보 조회 (동시 처리)")

def fetch_details(user):
    ocid = user.get("ocid")
    if not ocid:
        return None

    result = {
        "ocid": ocid,                        # 추가
        "ranking": user.get("ranking"),     # 추가
        "character_name": user["character_name"],
        "world_name": user["world_name"],
        "character_gender": None,
        "character_class": None,
        "character_class_level": None,
        "character_level": None,
        "character_exp": None,
        "character_exp_rate": None,
        "character_guild_name": None,
        "character_date_create": None,
        "access_flag": "false",
        "liberation_quest_clear_flag": "false",
        "popularity": None,
        "arcane_sum": None,
        "authentic_sum": None,
        "set_absorlab_count": 0,
        "set_lubatiss_count": 0,
        "set_arcane_count": 0,
        "set_eternal_count": 0,
        "dojang_best_floor": None,
        "union_level": None,
        "union_artifact_level": None,
        "union_artifact_exp": None,
        "union_artifact_point": None
    }

    try:
        url = f"https://open.api.nexon.com/maplestory/v1/character/basic?ocid={ocid}&date={target_date}"
        res = session.get(url, timeout=10)
        if res.status_code == 200:
            j = res.json()
            result["character_gender"] = j.get("character_gender")
            result["character_class"] = j.get("character_class")
            result["character_class_level"] = j.get("character_class_level")
            result["character_level"] = j.get("character_level")
            result["character_exp"] = j.get("character_exp")
            result["character_exp_rate"] = j.get("character_exp_rate")
            result["character_guild_name"] = j.get("character_guild_name")
            result["character_date_create"] = j.get("character_date_create")
            result["access_flag"] = j.get("access_flag", "false")
            result["liberation_quest_clear_flag"] = j.get("liberation_quest_clear_flag", "false")
    except Exception as e:
        print(f"❌ 기본 정보 실패 ({ocid}): {e}")

    try:
        url = f"https://open.api.nexon.com/maplestory/v1/character/popularity?ocid={ocid}&date={target_date}"
        res = session.get(url, timeout=10)
        if res.status_code == 200:
            result["popularity"] = res.json().get("popularity")
    except Exception as e:
        print(f"❌ 인기도 실패 ({ocid}): {e}")

    try:
        url = f"https://open.api.nexon.com/maplestory/v1/character/symbol-equipment?ocid={ocid}&date={target_date}"
        res = session.get(url, timeout=10)
        if res.status_code == 200:
            symbols = res.json().get("symbol", [])
            result["arcane_sum"] = sum(s.get("symbol_level", 0) for s in symbols if "아케인" in s.get("symbol_name", ""))
            result["authentic_sum"] = sum(s.get("symbol_level", 0) for s in symbols if "어센틱" in s.get("symbol_name", ""))
    except Exception as e:
        print(f"❌ 심볼 실패 ({ocid}): {e}")

    try:
        url = f"https://open.api.nexon.com/maplestory/v1/character/set-effect?ocid={ocid}&date={target_date}"
        res = session.get(url, timeout=10)
        if res.status_code == 200:
            sets = res.json().get("set_effect", [])
            for s in sets:
                name = s.get("set_name", "")
                count = s.get("total_set_count", 0)
                if "앱솔랩스" in name:
                    result["set_absorlab_count"] += count
                elif "루타비스" in name:
                    result["set_lubatiss_count"] += count
                elif "아케인셰이드" in name:
                    result["set_arcane_count"] += count
                elif "에테르넬" in name:
                    result["set_eternal_count"] += count
    except Exception as e:
        print(f"❌ 세트 효과 실패 ({ocid}): {e}")

    try:
        url = f"https://open.api.nexon.com/maplestory/v1/character/dojang?ocid={ocid}&date={target_date}"
        res = session.get(url, timeout=10)
        if res.status_code == 200:
            result["dojang_best_floor"] = res.json().get("dojang_best_floor")
    except Exception as e:
        print(f"❌ 도장 실패 ({ocid}): {e}")

    try:
        url = f"https://open.api.nexon.com/maplestory/v1/user/union?ocid={ocid}&date={target_date}"
        res = session.get(url, timeout=10)
        if res.status_code == 200:
            j = res.json()
            result["union_level"] = j.get("union_level")
            result["union_artifact_level"] = j.get("union_artifact_level")
            result["union_artifact_exp"] = j.get("union_artifact_exp")
            result["union_artifact_point"] = j.get("union_artifact_point")
    except Exception as e:
        print(f"❌ 유니온 실패 ({ocid}): {e}")

    return result

# STEP 3 실행
detailed_info = []
with ThreadPoolExecutor(max_workers=50) as executor:
    futures = [executor.submit(fetch_details, user) for user in user_list if user.get("ocid")]
    for future in as_completed(futures):
        res = future.result()
        if res is not None:
            detailed_info.append(res)

# STEP 4: CSV 저장
print("\u25b6 STEP 4: CSV 저장")
df_out = pd.DataFrame(detailed_info)
output_filename = f"achievement_fullinfo_{target_date}.csv"
df_out.to_csv(output_filename, index=False, encoding="utf-8-sig")
print(f"✅ 완료! 저장된 파일: {output_filename}")


### 2. 데이터 필터링

In [None]:
import pandas as pd

# 파일 로드
# 실제 파일 경로로 변경해 주세요
file_oct17 = '/content/drive/MyDrive/새폴더/캐릭터 레벨 기준/날짜별 모음/character_level_2024-10-17.csv'  # 10월 17일 파일 경로
file_oct10 = '/content/drive/MyDrive/achievement_fullinfo_2024-10-10.csv'  # 10월 10일 파일 경로

# 파일 읽기
df_oct17 = pd.read_csv(file_oct17)
df_oct10 = pd.read_csv(file_oct10)

# 10월 17일 파일에서 character_level이 270 이상인 ocid 추출
high_level_ocids = df_oct17[df_oct17['character_level'] >= 270]['ocid'].unique()

print(f"10월 17일 파일에서 character_level이 270 이상인 ocid 수: {len(high_level_ocids)}")

# 10월 10일 파일에서 이 ocid들만 필터링
filtered_oct10 = df_oct10[df_oct10['ocid'].isin(high_level_ocids)]

print(f"10월 10일 파일에서 매칭된 ocid 수: {len(filtered_oct10['ocid'].unique())}")

# 결과 저장
filtered_oct10.to_csv('filtered_oct10_result.csv', index=False, encoding='utf-8-sig')

print("필터링 완료! 결과가 'filtered_oct10_result.csv'에 저장되었습니다.")

In [None]:
import pandas as pd

# 1) 원본 CSV 불러오기 ─ 파일 경로를 맞게 수정
file_path = "/content/drive/MyDrive/새폴더/캐릭터 레벨 기준/병합하기 위한 과정/final_data_changed_only.csv"
df = pd.read_csv(file_path)

# 2) exp_1010이 NaN인 행 제거
df_clean = df.dropna(subset=["exp_1010"]).reset_index(drop=True)

# 3) 새 CSV로 저장 ─ 덮어쓰거나, 다른 이름으로 분리 저장
save_path = "/content/final_clean_1010~1017.csv"
df_clean.to_csv(save_path, index=False, encoding='utf-8-sig')

print(f"✅ 결측 행 제거 및 저장 완료 → {save_path}  (남은 행 수: {len(df_clean)})")


### 3. LSTM

In [None]:
!pip install lifelines
from lifelines.utils import concordance_index

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Masking
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import accuracy_score, roc_auc_score
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import StandardScaler

In [None]:
df = pd.read_csv("/content/time_series_final1.csv", parse_dates=["snapshot_date"])

In [None]:
# 날짜 처리
df["snapshot_date"] = pd.to_datetime(df["snapshot_date"], errors="coerce")
df["character_date_create"] = pd.to_datetime(df["character_date_create"], errors="coerce")
df = df[df["character_class_level"] == 6].copy()
df = df.sort_values(by=["ocid", "snapshot_date"])

# 파생 피처 생성
df["has_guild"] = df["character_guild_name"].notna().astype(int)
df['snapshot_date'] = df['snapshot_date'].dt.tz_localize(None)
df['character_date_create'] = df['character_date_create'].dt.tz_localize(None)
df['account_age_days'] = (df['snapshot_date'] - df['character_date_create']).dt.days.clip(lower=0)

# 선택 피처
selected_features = [
    "world_name", "character_class", "liberation_quest_clear_flag",
    "popularity", "arcane_sum", "authentic_sum", "union_level", "authentic_growth_sum",
    "has_guild", "account_age_days"
]

# ocid 기준 train/test 분리
unique_ocids = df['ocid'].unique()
train_ocids, test_ocids = train_test_split(unique_ocids, test_size=0.2, random_state=42)
train_df_all = df[df['ocid'].isin(train_ocids)].copy()
test_df = df[df['ocid'].isin(test_ocids)].copy()

# ocid 기준으로 train/val 분리 (검증셋 누수 방지용)
train_ocids_sub, val_ocids = train_test_split(train_ocids, test_size=0.2, random_state=42)
train_df = train_df_all[train_df_all['ocid'].isin(train_ocids_sub)].copy()
val_df = train_df_all[train_df_all['ocid'].isin(val_ocids)].copy()

# 범주형 인코딩 - 수정된 부분
cat_cols = ["world_name", "character_class"]
encoded_features = []
for col in cat_cols:
    le = LabelEncoder()
    le.fit(train_df[col].astype(str))

    # Create new column names for encoded features
    new_col_name = f"{col}_encoded"
    encoded_features.append(new_col_name)

    # Transform known values and handle unknown values
    for sub_df in [train_df, val_df, test_df]:
        sub_df[new_col_name] = -1  # Default for unknown values
        # Only transform values that exist in the training set
        mask = sub_df[col].astype(str).isin(le.classes_)
        sub_df.loc[mask, new_col_name] = le.transform(sub_df.loc[mask, col].astype(str))
        # Replace -1 (unknown values) with 0 (first class)
        sub_df[new_col_name] = sub_df[new_col_name].replace(-1, 0)

# 불리언
for sub_df in [train_df, val_df, test_df]:
    sub_df["liberation_quest_clear_flag"] = sub_df["liberation_quest_clear_flag"].astype(int)

# 수정된 특성 리스트 (카테고리컬 변수 교체)
modified_features = [feat if feat not in cat_cols else f"{feat}_encoded" for feat in selected_features]

# 정규화 - 숫자형 피처만 선택
numeric_features = [feat for feat in modified_features if feat not in ["world_name_encoded", "character_class_encoded"]]
category_features = ["world_name_encoded", "character_class_encoded"]

# 숫자형 피처만 정규화
scaler = StandardScaler()
scaler.fit(train_df[numeric_features])
for sub_df in [train_df, val_df, test_df]:
    sub_df[numeric_features] = scaler.transform(sub_df[numeric_features])

In [None]:
# 시퀀스 생성 함수
def create_sequences(df, seq_len=10):
    X_list, y_list = [], []
    for ocid, group in df.groupby("ocid"):
        group = group.sort_values("snapshot_date")
        features = group[modified_features].values
        if len(features) >= 2:
            last_event = group["event"].iloc[-1]
            for i in range(len(group) - seq_len + 1):
                seq = features[i:i+seq_len]
                if len(seq) == seq_len:
                    X_list.append(seq)
                    y_list.append(group["event"].iloc[i+seq_len-1])
    return np.array(X_list), np.array(y_list)

# 시퀀스 생성
SEQ_LEN = 10
X_train, event_train = create_sequences(train_df, SEQ_LEN)
X_val, event_val = create_sequences(val_df, SEQ_LEN)
X_test, event_test = create_sequences(test_df, SEQ_LEN)

# NaN 제거
X_train = np.nan_to_num(X_train)
X_val = np.nan_to_num(X_val)
X_test = np.nan_to_num(X_test)
event_train = np.nan_to_num(event_train).astype(int)
event_val = np.nan_to_num(event_val).astype(int)
event_test = np.nan_to_num(event_test).astype(int)

# 클래스 불균형 보정
class_weights = class_weight.compute_class_weight(
    class_weight='balanced',
    classes=np.unique(event_train),
    y=event_train
)
class_weights = {i: w for i, w in enumerate(class_weights)}

# 모델 정의
def build_model(input_shape):
    model = Sequential([
        Masking(mask_value=0.0, input_shape=input_shape),
        LSTM(64, return_sequences=True, dropout=0.2, recurrent_dropout=0.2),
        LSTM(32, return_sequences=False, dropout=0.2, recurrent_dropout=0.2),
        Dropout(0.4),
        Dense(16, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    model.compile(
        optimizer=Adam(learning_rate=0.001),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    return model

In [None]:
# 모델 학습
model = build_model((SEQ_LEN, len(modified_features)))
callbacks = [
    EarlyStopping(patience=5, restore_best_weights=True, monitor='val_loss'),
    ReduceLROnPlateau(patience=3, factor=0.5, min_lr=1e-5, monitor='val_loss')
]
history = model.fit(
    X_train, event_train,
    validation_data=(X_val, event_val),
    epochs=5,
    batch_size=32,
    class_weight=class_weights,
    callbacks=callbacks,
    verbose=1
)

# 예측 및 평가
pred_risk = model.predict(X_test).flatten()
auc = roc_auc_score(event_test, pred_risk)
print(f"ROC-AUC: {auc:.4f}")

In [None]:
from sklearn.metrics import classification_report, confusion_matrix

# 최적 threshold 적용
final_preds = (pred_risk >= best_threshold).astype(int)

# 성능 평가 지표
print("Classification Report (Optimal Threshold):\n")
print(classification_report(event_test, final_preds, digits=4))

# 혼동 행렬
cm = confusion_matrix(event_test, final_preds)
plt.figure(figsize=(6,5))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=["Pred: 0", "Pred: 1"], yticklabels=["True: 0", "True: 1"])
plt.title(f"Confusion Matrix (Threshold = {best_threshold:.2f})")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.tight_layout()
plt.show()

In [None]:
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, precision_score, recall_score

# 임계값 적용 (기본 0.5)
threshold = 0.5
pred_label = (pred_risk >= threshold).astype(int)

# classification report 출력
report = classification_report(event_test, pred_label, digits=4)
print("Classification Report:\n", report)

# Confusion Matrix 시각화
conf_matrix = confusion_matrix(event_test, pred_label)
plt.figure(figsize=(6, 5))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.tight_layout()
plt.show()
