##사전작업

In [None]:
#파이토치 패키지 설치
!pip install torch-scatter -f https://data.pyg.org/whl/torch-2.0.1+cu118.html
!pip install torch-sparse -f https://data.pyg.org/whl/torch-2.0.1+cu118.html
!pip install torch-geometric

Looking in links: https://data.pyg.org/whl/torch-2.0.1+cu118.html
Collecting torch-scatter
  Downloading https://data.pyg.org/whl/torch-2.0.0%2Bcu118/torch_scatter-2.1.2%2Bpt20cu118-cp311-cp311-linux_x86_64.whl (10.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.2/10.2 MB[0m [31m69.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch-scatter
Successfully installed torch-scatter-2.1.2+pt20cu118
Looking in links: https://data.pyg.org/whl/torch-2.0.1+cu118.html
Collecting torch-sparse
  Downloading https://data.pyg.org/whl/torch-2.0.0%2Bcu118/torch_sparse-0.6.18%2Bpt20cu118-cp311-cp311-linux_x86_64.whl (4.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.9/4.9 MB[0m [31m56.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torch-sparse
Successfully installed torch-sparse-0.6.18+pt20cu118
Collecting torch-geometric
  Downloading torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)
[2K     [90m━━━━━━━━

In [None]:
!pip install tqdm



In [None]:
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from tqdm import tqdm

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
df_name5000 = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/df_name5000.csv')

##GraphSage 기반 클러스터링

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import SAGEConv
from torch_geometric.data import Data
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import pandas as pd
import numpy as np
from tqdm import tqdm

# 1. 전처리: 전투/지원/이동/생존 특성 정의
# 팀 내 역할 기반 특성 구성
features = ['aggression', 'support', 'mobility', 'survivability']
df_name5000['aggression'] = df_name5000['player_kills'] + df_name5000['player_dbno'] + (df_name5000['player_dmg'] / 100)
df_name5000['support'] = df_name5000['player_assists']
df_name5000['mobility'] = df_name5000['player_dist_walk'] + df_name5000['player_dist_ride']
df_name5000['survivability'] = df_name5000['player_survive_time']

X = df_name5000[features].values.astype(np.float32)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 2. 엣지 연결 (같은 팀)
grouped = df_name5000.groupby(['match_id', 'team_id']).indices
edge_index = []
for group in grouped.values():
    idx = list(group)
    for i in range(len(idx)):
        for j in range(i+1, len(idx)):
            edge_index.append([idx[i], idx[j]])
            edge_index.append([idx[j], idx[i]])
edge_index = torch.tensor(edge_index).t().contiguous()

# 3. PyG 데이터 생성
data = Data(x=torch.tensor(X_scaled, dtype=torch.float), edge_index=edge_index)

# 4. GraphSAGE 모델 정의
class GraphSAGE(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super().__init__()
        self.conv1 = SAGEConv(in_channels, hidden_channels)
        self.conv2 = SAGEConv(hidden_channels, out_channels)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return x

# 5. 학습 (입력=4, 출력=4, MSE 재구성)
model = GraphSAGE(in_channels=4, hidden_channels=16, out_channels=4)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
model.train()
for epoch in tqdm(range(100), desc="Training GraphSAGE"):
    optimizer.zero_grad()
    out = model(data)
    loss = F.mse_loss(out, data.x)
    loss.backward()
    optimizer.step()
    if epoch % 10 == 0:
        tqdm.write(f"Epoch {epoch}, Loss: {loss.item():.4f}")

# 6. 임베딩 추출 + KMeans
model.eval()
with torch.no_grad():
    embeddings = model(data).cpu().numpy()
kmeans = KMeans(n_clusters=5, random_state=42)
clusters = kmeans.fit_predict(embeddings)
df_name5000['cluster'] = clusters


Training GraphSAGE:   1%|          | 1/100 [00:00<00:51,  1.90it/s]

Epoch 0, Loss: 1.1471


Training GraphSAGE:  11%|█         | 11/100 [00:05<00:44,  1.98it/s]

Epoch 10, Loss: 0.4929


Training GraphSAGE:  21%|██        | 21/100 [00:09<00:38,  2.05it/s]

Epoch 20, Loss: 0.1962


Training GraphSAGE:  31%|███       | 31/100 [00:14<00:30,  2.24it/s]

Epoch 30, Loss: 0.1027


Training GraphSAGE:  41%|████      | 41/100 [00:19<00:29,  2.01it/s]

Epoch 40, Loss: 0.0687


Training GraphSAGE:  51%|█████     | 51/100 [00:24<00:22,  2.14it/s]

Epoch 50, Loss: 0.0353


Training GraphSAGE:  61%|██████    | 61/100 [00:28<00:18,  2.12it/s]

Epoch 60, Loss: 0.0175


Training GraphSAGE:  71%|███████   | 71/100 [00:33<00:13,  2.09it/s]

Epoch 70, Loss: 0.0088


Training GraphSAGE:  81%|████████  | 81/100 [00:38<00:09,  2.04it/s]

Epoch 80, Loss: 0.0060


Training GraphSAGE:  91%|█████████ | 91/100 [00:43<00:04,  1.96it/s]

Epoch 90, Loss: 0.0045


Training GraphSAGE: 100%|██████████| 100/100 [00:48<00:00,  2.08it/s]


In [None]:
df_name5000.head()

Unnamed: 0,date,game_size,match_id,match_mode,party_size,player_assists,player_dbno,player_dist_ride,player_dist_walk,player_dmg,player_kills,player_name,player_survive_time,team_id,team_placement,count,cluster,aggression,support,mobility,survivability
0,2017-12-28T09:30:48+0000,48,2U4GBNA0YmnZDcOwRYKzxWhbr5Jaxxjhp0cRvsI40iOQJd...,tpp,2,0,0,872.5879,2512.29663,111,0,shuaizeze,1517.55,8,8,1,1,1.11,0,3384.88453,1517.55
1,2017-12-28T09:30:48+0000,48,2U4GBNA0YmnZDcOwRYKzxWhbr5Jaxxjhp0cRvsI40iOQJd...,tpp,2,0,1,1156.54919,2890.864,198,0,yunqiu,1517.552,8,8,1,1,2.98,0,4047.41319,1517.552
2,2017-12-11T22:02:39+0000,24,2U4GBNA0YmnmvI29GR080nJ5U4oZyrJ2kJqz7BeQrwDdx6...,tpp,4,2,0,6234.761,1619.87219,168,1,Jong1cm,1688.669,13,4,1,2,2.68,2,7854.63319,1688.669
3,2018-01-07T03:45:01+0000,47,2U4GBNA0YmnLSqvEycnTjo-KT000vfUnhSA2vfVhVPe1QB...,tpp,2,1,1,0.0,1623.34924,364,2,Piece_of_Sht,985.847,17,14,1,2,6.64,1,1623.34924,985.847
4,2018-01-07T03:45:01+0000,47,2U4GBNA0YmnLSqvEycnTjo-KT000vfUnhSA2vfVhVPe1QB...,tpp,2,0,0,0.0,15.25666,0,0,NorthernGriz,96.115,32,46,1,1,0.0,0,15.25666,96.115


In [None]:
# 전처리한 주요 4개 특성
cluster_summary = df_name5000.groupby('cluster')[['aggression', 'support', 'mobility', 'survivability']].mean().round(2)

# 출력
import pandas as pd
pd.set_option('display.max_columns', None)
display(cluster_summary)

Unnamed: 0_level_0,aggression,support,mobility,survivability
cluster,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,11.65,0.4,4348.87,1275.38
1,1.63,0.0,1803.02,616.16
2,3.82,1.39,3477.04,1049.35
3,4.34,0.0,0.0,63646420000.0
4,2.95,0.24,643851.65,1018.14


In [None]:
df_name5000.pivot_table(index = "cluster", values = 'count', aggfunc='sum')

Unnamed: 0_level_0,count
cluster,Unnamed: 1_level_1
0,156338
1,950415
2,243882
3,1
4,59


In [None]:
# 팀별로 클러스터 조합과 최종 등수 요약
team_summary = df_name5000.groupby(['match_id', 'team_id']).agg({
    'cluster': lambda x: tuple(sorted(x)),  # 팀 내 플레이어들의 클러스터 조합
    'team_placement': 'min'  # 팀의 최종 등수
}).reset_index()

In [None]:
# 조합별 평균 등수 계산
combo_result = team_summary.groupby('cluster')['team_placement'].mean().sort_values()
combo_result = combo_result.reset_index().rename(columns={'team_placement': 'avg_team_placement'})

In [None]:
import pprint
pprint.pprint(combo_result.head(10))

        cluster  avg_team_placement
0  (0, 0, 0, 0)            1.000000
1        (1, 3)            2.000000
2  (0, 0, 0, 2)            2.800000
3     (0, 2, 4)            3.000000
4     (0, 0, 0)            3.909091
5  (0, 0, 2, 2)            4.159091
6     (0, 0, 2)            5.335361
7  (0, 2, 2, 2)            5.773852
8  (0, 0, 1, 2)            5.855319
9  (0, 0, 0, 1)            6.058824


##GAT 기반 클러스터링

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GATConv
from torch_geometric.data import Data
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import pandas as pd
import numpy as np
from tqdm import tqdm

# 1. 전처리: 전투/지원/이동/생존 특성 정의
features = ['aggression', 'support', 'mobility', 'survivability']
df_name5000['aggression'] = df_name5000['player_kills'] + df_name5000['player_dbno'] + (df_name5000['player_dmg'] / 100)
df_name5000['support'] = df_name5000['player_assists']
df_name5000['mobility'] = df_name5000['player_dist_walk'] + df_name5000['player_dist_ride']
df_name5000['survivability'] = df_name5000['player_survive_time']

X = df_name5000[features].values.astype(np.float32)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 2. 엣지 연결 (같은 팀)
grouped = df_name5000.groupby(['match_id', 'team_id']).indices
edge_index = []
for group in grouped.values():
    idx = list(group)
    for i in range(len(idx)):
        for j in range(i+1, len(idx)):
            edge_index.append([idx[i], idx[j]])
            edge_index.append([idx[j], idx[i]])
edge_index = torch.tensor(edge_index).t().contiguous()

# 3. PyG 데이터 생성
data = Data(x=torch.tensor(X_scaled, dtype=torch.float), edge_index=edge_index)

# 4. GAT 모델 정의
class GAT(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, heads=1):
        super().__init__()
        self.gat1 = GATConv(in_channels, hidden_channels, heads=heads)
        self.gat2 = GATConv(hidden_channels * heads, out_channels, heads=1)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.gat1(x, edge_index)
        x = F.elu(x)
        x = self.gat2(x, edge_index)
        return x

# 5. 학습 (입출력 = 4차원)
model = GAT(in_channels=4, hidden_channels=8, out_channels=4)
optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
model.train()
for epoch in tqdm(range(100), desc="Training GAT"):
    optimizer.zero_grad()
    out = model(data)
    loss = F.mse_loss(out, data.x)
    loss.backward()
    optimizer.step()
    if epoch % 10 == 0:
        tqdm.write(f"Epoch {epoch}, Loss: {loss.item():.4f}")

# 6. 임베딩 추출 + KMeans
model.eval()
with torch.no_grad():
    embeddings = model(data).cpu().numpy()
kmeans = KMeans(n_clusters=5, random_state=42)
clusters = kmeans.fit_predict(embeddings)
df_name5000['cluster'] = clusters

Training GAT:   1%|          | 1/100 [00:00<01:27,  1.13it/s]

Epoch 0, Loss: 1.2185


Training GAT:  11%|█         | 11/100 [00:08<01:02,  1.43it/s]

Epoch 10, Loss: 0.9579


Training GAT:  21%|██        | 21/100 [00:15<01:00,  1.30it/s]

Epoch 20, Loss: 0.8085


Training GAT:  31%|███       | 31/100 [00:22<00:46,  1.50it/s]

Epoch 30, Loss: 0.7153


Training GAT:  41%|████      | 41/100 [00:30<00:44,  1.34it/s]

Epoch 40, Loss: 0.6574


Training GAT:  51%|█████     | 51/100 [00:37<00:33,  1.46it/s]

Epoch 50, Loss: 0.6185


Training GAT:  61%|██████    | 61/100 [00:44<00:27,  1.39it/s]

Epoch 60, Loss: 0.5900


Training GAT:  71%|███████   | 71/100 [00:51<00:21,  1.33it/s]

Epoch 70, Loss: 0.5685


Training GAT:  81%|████████  | 81/100 [00:58<00:13,  1.41it/s]

Epoch 80, Loss: 0.5490


Training GAT:  91%|█████████ | 91/100 [01:06<00:06,  1.34it/s]

Epoch 90, Loss: 0.5139


Training GAT: 100%|██████████| 100/100 [01:12<00:00,  1.38it/s]


In [None]:
# 우리가 전처리해서 사용한 주요 4개 특성
features = ['aggression', 'support', 'mobility', 'survivability']

# 클러스터별 평균 계산
cluster_summary = df_name5000.groupby('cluster')[features].mean().round(2)

# 보기 좋게 출력
import pandas as pd
pd.set_option('display.max_columns', None)
display(cluster_summary)

Unnamed: 0_level_0,aggression,support,mobility,survivability
cluster,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,10.53,1.74,5112.63,1502.68
1,1.18,0.04,616.38,388.79
2,2.06,0.09,5791.17,1289.05
3,3.34,1.07,3413.39,410400.28
4,7.47,0.13,2734.99,940.81


In [None]:
df_name5000.pivot_table(index = "cluster", values = 'count', aggfunc='sum')

Unnamed: 0_level_0,count
cluster,Unnamed: 1_level_1
0,92702
1,671216
2,222996
3,155467
4,208314


In [None]:
team_summary = df_name5000.groupby(['match_id', 'team_id']).agg({
    'cluster': lambda x: tuple(sorted(x)),  # 팀 내 플레이어들의 클러스터 조합
    'team_placement': 'min'  # 팀 최종 등수 (낮을수록 좋음)
}).reset_index()

In [None]:
combo_result = team_summary.groupby('cluster')['team_placement'].mean().sort_values()
combo_result = combo_result.reset_index().rename(columns={'team_placement': 'avg_team_placement'})

In [None]:
import pprint
pprint.pprint(combo_result)

         cluster  avg_team_placement
0         (0, 3)            2.000000
1         (2, 4)            2.000000
2   (0, 0, 0, 0)            3.674952
3      (0, 0, 0)            4.345101
4         (0, 0)            5.842092
5           (0,)            7.374402
6      (2, 3, 3)            7.500000
7   (2, 2, 2, 2)            8.180430
8      (2, 2, 2)            8.321492
9   (3, 3, 3, 3)            8.720183
10     (2, 2, 3)            9.000000
11          (2,)            9.754076
12     (3, 3, 3)            9.841162
13  (4, 4, 4, 4)           10.233888
14        (2, 2)           10.333204
15     (4, 4, 4)           11.015750
16     (1, 2, 2)           11.500000
17        (3, 3)           12.681302
18  (2, 2, 4, 4)           13.000000
19        (4, 4)           14.172249
20        (1, 2)           15.000000
21          (3,)           15.571277
22          (4,)           15.935037
23  (1, 1, 1, 1)           20.031380
24     (1, 1, 1)           20.132260
25          (1,)           25.231420
2