In [None]:
import pandas as pd

In [None]:
df = pd.read_csv("/content/before clust.csv")
df

Unnamed: 0,player_name,avg_player_assists,avg_player_dbno,avg_player_dist_ride,avg_player_dist_walk,avg_player_dmg,avg_player_kills,avg_player_survive_time,avg_team_placement,victim_time,victim_position_x,victim_position_y,killer_position_x,killer_position_y
0,0--kxxk--0,0.203125,0.328125,1233.789907,1113.421918,67.906250,0.359375,823.015641,18.562500,172,198047.9,291476.7,199213.8,292095.2
1,0--kxxk--0,0.203125,0.328125,1233.789907,1113.421918,67.906250,0.359375,823.015641,18.562500,231,346836.5,312080.0,358682.8,298949.7
2,0--kxxk--0,0.203125,0.328125,1233.789907,1113.421918,67.906250,0.359375,823.015641,18.562500,98,432471.4,635862.7,0.0,0.0
3,0-0-Bram,0.267241,0.594828,1357.781589,1808.272817,120.577586,0.646552,964.484716,18.370690,146,393143.1,348960.9,391272.6,346693.7
4,0-0PenGzF,0.205882,0.764706,1049.706310,1268.093113,196.676471,1.617647,707.203706,30.882353,138,421411.3,325029.4,421163.2,324833.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
614084,zzzzzzzzzzzzz1,0.325000,0.625000,1086.818038,1016.234660,135.250000,0.941667,710.247467,27.216667,347,628295.3,607591.3,618280.8,602102.2
614085,zzzzzzzzzzzzz1,0.325000,0.625000,1086.818038,1016.234660,135.250000,0.941667,710.247467,27.216667,97,176102.4,304277.8,176179.7,304266.7
614086,zzzzzzzzzzzzz1,0.325000,0.625000,1086.818038,1016.234660,135.250000,0.941667,710.247467,27.216667,358,423396.1,324839.4,420969.6,324558.2
614087,zzzzzzzzzzzzz1,0.325000,0.625000,1086.818038,1016.234660,135.250000,0.941667,710.247467,27.216667,225,418154.3,324043.7,419437.5,324776.7


In [None]:
df.columns

Index(['player_name', 'avg_player_assists', 'avg_player_dbno',
       'avg_player_dist_ride', 'avg_player_dist_walk', 'avg_player_dmg',
       'avg_player_kills', 'avg_player_survive_time', 'avg_team_placement',
       'victim_time', 'victim_position_x', 'victim_position_y',
       'killer_position_x', 'killer_position_y'],
      dtype='object')

In [None]:
import pandas as pd
import joblib
import numpy as np

# Scikit-learn 모델 및 스케일러 사용을 위해 필요
from sklearn.mixture import GaussianMixture
from sklearn.preprocessing import StandardScaler

# --- 1. 파일 경로 및 데이터 준비 ---
SCALER_PATH = "/content/scaler_erangel.joblib"
MODEL_PATH = "/content/final_gmm_model_k40.joblib"

# --- 2. 스케일러 및 GMM 모델 불러오기 ---
print("--- 2. 스케일러 및 GMM 모델 로딩 ---")
try:
    scaler = joblib.load(SCALER_PATH)
    gmm_model = joblib.load(MODEL_PATH)
    print("스케일러와 GMM 모델 로딩 성공!")
except FileNotFoundError as e:
    # 예제 실행을 위해 가상 스케일러/모델 생성 (실제 사용 시 이 부분은 불필요)
    print(
        f"경고: '{e.filename}' 파일을 찾을 수 없습니다. 예제 실행을 위해 가상 객체를 생성하고 저장합니다."
    )
    valid_coords_for_fit = df[df["killer_position_x"] != 0][
        ["killer_position_x", "killer_position_y"]
    ].values
    if "scaler" in str(e):
        scaler = StandardScaler().fit(valid_coords_for_fit)
        joblib.dump(scaler, SCALER_PATH)
    if "gmm_model" in str(e):
        gmm_model = GaussianMixture(n_components=40, random_state=42).fit(
            scaler.transform(valid_coords_for_fit)
        )
        joblib.dump(gmm_model, MODEL_PATH)
    print("가상 객체 생성 완료.")


# --- 3. 클러스터 예측 및 데이터프레임 업데이트 ---
print("\n--- 3. 클러스터 예측 및 데이터프레임 업데이트 ---")

# 유효한 좌표(0,0이 아닌)만 필터링
valid_coords_mask = (df["killer_position_x"] != 0) & (df["killer_position_y"] != 0)
coords_to_predict = df.loc[
    valid_coords_mask, ["killer_position_x", "killer_position_y"]
]

if not coords_to_predict.empty:
    # 1. 스케일러 적용
    coords_scaled = scaler.transform(coords_to_predict)

    # 2. GMM 모델로 클러스터 예측
    cluster_predictions = gmm_model.predict(coords_scaled)

    # 3. 예측 결과를 새로운 컬럼('killer_cluster')으로 추가
    df.loc[valid_coords_mask, "killer_cluster"] = cluster_predictions

# 유효하지 않은 좌표(0,0 등)는 -1로 채워서 구분
df["killer_cluster"] = df["killer_cluster"].fillna(-1).astype(int)

# 4. 기존 좌표 컬럼 삭제
final_df = df.drop(columns=["killer_position_x", "killer_position_y"])


# --- 4. 최종 결과 확인 ---
print("\n--- 최종 결과 데이터프레임 ---")
print(final_df)

print("\n--- 생성된 클러스터별 데이터 개수 ---")
print(final_df["killer_cluster"].value_counts())

--- 2. 스케일러 및 GMM 모델 로딩 ---
스케일러와 GMM 모델 로딩 성공!

--- 3. 클러스터 예측 및 데이터프레임 업데이트 ---





--- 최종 결과 데이터프레임 ---
           player_name  avg_player_assists  avg_player_dbno  \
0           0--kxxk--0            0.203125         0.328125   
1           0--kxxk--0            0.203125         0.328125   
2           0--kxxk--0            0.203125         0.328125   
3             0-0-Bram            0.267241         0.594828   
4            0-0PenGzF            0.205882         0.764706   
...                ...                 ...              ...   
614084  zzzzzzzzzzzzz1            0.325000         0.625000   
614085  zzzzzzzzzzzzz1            0.325000         0.625000   
614086  zzzzzzzzzzzzz1            0.325000         0.625000   
614087  zzzzzzzzzzzzz1            0.325000         0.625000   
614088  zzzzzzzzzzzzz1            0.325000         0.625000   

        avg_player_dist_ride  avg_player_dist_walk  avg_player_dmg  \
0                1233.789907           1113.421918       67.906250   
1                1233.789907           1113.421918       67.906250   
2          

In [None]:
final_df

Unnamed: 0,player_name,avg_player_assists,avg_player_dbno,avg_player_dist_ride,avg_player_dist_walk,avg_player_dmg,avg_player_kills,avg_player_survive_time,avg_team_placement,victim_time,victim_position_x,victim_position_y,killer_cluster
0,0--kxxk--0,0.203125,0.328125,1233.789907,1113.421918,67.906250,0.359375,823.015641,18.562500,172,198047.9,291476.7,5
1,0--kxxk--0,0.203125,0.328125,1233.789907,1113.421918,67.906250,0.359375,823.015641,18.562500,231,346836.5,312080.0,27
2,0--kxxk--0,0.203125,0.328125,1233.789907,1113.421918,67.906250,0.359375,823.015641,18.562500,98,432471.4,635862.7,-1
3,0-0-Bram,0.267241,0.594828,1357.781589,1808.272817,120.577586,0.646552,964.484716,18.370690,146,393143.1,348960.9,34
4,0-0PenGzF,0.205882,0.764706,1049.706310,1268.093113,196.676471,1.617647,707.203706,30.882353,138,421411.3,325029.4,7
...,...,...,...,...,...,...,...,...,...,...,...,...,...
614084,zzzzzzzzzzzzz1,0.325000,0.625000,1086.818038,1016.234660,135.250000,0.941667,710.247467,27.216667,347,628295.3,607591.3,25
614085,zzzzzzzzzzzzz1,0.325000,0.625000,1086.818038,1016.234660,135.250000,0.941667,710.247467,27.216667,97,176102.4,304277.8,31
614086,zzzzzzzzzzzzz1,0.325000,0.625000,1086.818038,1016.234660,135.250000,0.941667,710.247467,27.216667,358,423396.1,324839.4,7
614087,zzzzzzzzzzzzz1,0.325000,0.625000,1086.818038,1016.234660,135.250000,0.941667,710.247467,27.216667,225,418154.3,324043.7,7


In [None]:
final_df.to_csv("formodel1.csv", index=False)