In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# 데이터 로드
df = pd.read_excel('C:/Users/NT551/Desktop/데이터 추출(연습)/last_row_of_collected_datas.xlsx')

# 데이터프레임의 열 이름 출력하여 확인
print("Initial Columns in DataFrame:", df.columns)

# 필요 없는 열 제거
columns_to_drop = [col for col in df.columns if 'esportsPlayerId_' in col or 'teamCode_' in col or 'summonerName_' in col or 'championName_' in col]
df = df.drop(columns=columns_to_drop, errors='ignore')

# 데이터프레임의 열 이름 확인
print("Columns after dropping unnecessary columns:", df.columns)

# 결측값 처리
df.fillna(df.mean(), inplace=True)
print("Columns after filling missing values:", df.columns)

# 범주형 데이터 추출
categorical_features = [col for col in df.columns if 'championName_' in col]
numeric_features = [col for col in df.columns if col not in categorical_features and col != 'blue_totalGold' and col != 'red_totalGold']

print("Categorical Features:", categorical_features)
print("Numeric Features:", numeric_features)

# 전처리기 설정
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', OneHotEncoder(), categorical_features)
    ])

# 모델 정의
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(random_state=42))
])

# 데이터 분할
X = df.drop(columns=['blue_totalGold', 'red_totalGold'], errors='ignore')  # errors='ignore' 추가하여 없는 열 제거 시 오류 방지
y = (df['blue_totalGold'] > df['red_totalGold']).astype(int)  # 승패를 이진화 (0: Red 승, 1: Blue 승)

# 데이터 분할 확인
print("X columns:", X.columns)
print("y distribution:", y.value_counts())

# 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# 모델 학습
model.fit(X_train, y_train)

# 예측
y_pred = model.predict(X_test)

# 성능 평가
print('Accuracy:', accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Initial Columns in DataFrame: Index(['gameId', 'duration', 'esportsTeamId_Blue', 'esportsTeamId_Red',
       'blue_totalGold', 'blue_inhibitors', 'blue_towers', 'blue_barons',
       'blue_totalKills', 'blue_dragons_count',
       ...
       'maxHealth_9', 'kills_9', 'deaths_9', 'assists_9', 'totalGoldEarned_9',
       'creepScore_9', 'killParticipation_9', 'championDamageShare_9',
       'wardsPlaced_9', 'wardsDestroyed_9'],
      dtype='object', length=166)
Columns after dropping unnecessary columns: Index(['gameId', 'duration', 'esportsTeamId_Blue', 'esportsTeamId_Red',
       'blue_totalGold', 'blue_inhibitors', 'blue_towers', 'blue_barons',
       'blue_totalKills', 'blue_dragons_count',
       ...
       'maxHealth_9', 'kills_9', 'deaths_9', 'assists_9', 'totalGoldEarned_9',
       'creepScore_9', 'killParticipation_9', 'championDamageShare_9',
       'wardsPlaced_9', 'wardsDestroyed_9'],
      dtype='object', length=126)
Columns after filling missing values: Index(['gameId', 'du

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

In [3]:
# 데이터 로드
df = pd.read_excel('C:/Users/NT551/Desktop/데이터 추출(연습)/last_row_of_collected_datas.xlsx')

# 데이터프레임의 열 이름 출력하여 확인
print("Initial Columns in DataFrame:", df.columns)

# 필요 없는 열 제거
columns_to_drop = [col for col in df.columns if 'esportsPlayerId_' in col or 'teamCode_' in col or 'summonerName_' in col or 'championName_' in col]
df = df.drop(columns=columns_to_drop, errors='ignore')

# 데이터프레임의 열 이름 확인
print("Columns after dropping unnecessary columns:", df.columns)

# 결측값 처리
df.fillna(df.mean(), inplace=True)
print("Columns after filling missing values:", df.columns)

# 범주형 데이터 추출
categorical_features = [col for col in df.columns if 'championName_' in col]
numeric_features = [col for col in df.columns if col not in categorical_features and col != 'blue_totalGold' and col != 'red_totalGold']

print("Categorical Features:", categorical_features)
print("Numeric Features:", numeric_features)

Initial Columns in DataFrame: Index(['gameId', 'duration', 'esportsTeamId_Blue', 'esportsTeamId_Red',
       'blue_totalGold', 'blue_inhibitors', 'blue_towers', 'blue_barons',
       'blue_totalKills', 'blue_dragons_count',
       ...
       'maxHealth_9', 'kills_9', 'deaths_9', 'assists_9', 'totalGoldEarned_9',
       'creepScore_9', 'killParticipation_9', 'championDamageShare_9',
       'wardsPlaced_9', 'wardsDestroyed_9'],
      dtype='object', length=166)
Columns after dropping unnecessary columns: Index(['gameId', 'duration', 'esportsTeamId_Blue', 'esportsTeamId_Red',
       'blue_totalGold', 'blue_inhibitors', 'blue_towers', 'blue_barons',
       'blue_totalKills', 'blue_dragons_count',
       ...
       'maxHealth_9', 'kills_9', 'deaths_9', 'assists_9', 'totalGoldEarned_9',
       'creepScore_9', 'killParticipation_9', 'championDamageShare_9',
       'wardsPlaced_9', 'wardsDestroyed_9'],
      dtype='object', length=126)
Columns after filling missing values: Index(['gameId', 'du

In [4]:
# 전처리기 설정
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', OneHotEncoder(), categorical_features)
    ])

# 모델 정의
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(random_state=42))
])

# 데이터 분할
X = df.drop(columns=['blue_totalGold', 'red_totalGold'], errors='ignore')  # errors='ignore' 추가하여 없는 열 제거 시 오류 방지
y = (df['blue_totalGold'] > df['red_totalGold']).astype(int)  # 승패를 이진화 (0: Red 승, 1: Blue 승)

# 데이터 분할 확인
print("X columns:", X.columns)
print("y distribution:", y.value_counts())

X columns: Index(['gameId', 'duration', 'esportsTeamId_Blue', 'esportsTeamId_Red',
       'blue_inhibitors', 'blue_towers', 'blue_barons', 'blue_totalKills',
       'blue_dragons_count', 'red_inhibitors',
       ...
       'maxHealth_9', 'kills_9', 'deaths_9', 'assists_9', 'totalGoldEarned_9',
       'creepScore_9', 'killParticipation_9', 'championDamageShare_9',
       'wardsPlaced_9', 'wardsDestroyed_9'],
      dtype='object', length=124)
y distribution: 1    4244
0    3792
Name: count, dtype: int64


In [5]:
# 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# 모델 학습
model.fit(X_train, y_train)

# 예측
y_pred = model.predict(X_test)

# 성능 평가
print('Accuracy:', accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Accuracy: 0.9834093737038573
              precision    recall  f1-score   support

           0       0.98      0.99      0.98      1132
           1       0.99      0.98      0.98      1279

    accuracy                           0.98      2411
   macro avg       0.98      0.98      0.98      2411
weighted avg       0.98      0.98      0.98      2411

