# Scraping Data

**Pengambilan semua data dengan metode scraping**

In [2]:
import requests
import json
import pandas as pd

# URL API
url = "https://api.gms.moontontech.com/api/gms/source/2669606/2756568"

# Header request
headers = {
    "Authorization": "hLal4Mf1ZPiSBWebWJbzP8kbwMg=",
    "Content-Type": "application/json;charset=UTF-8"
}

# Payload (Body request)
payload = {
    "pageSize": 128,
    "pageIndex": 1,
    "filters": [
        {"field": "bigrank", "operator": "eq", "value": "7"},
        {"field": "match_type", "operator": "eq", "value": 0}
    ],
    "sorts": [
        {"data": {"field": "main_hero_ban_rate", "order": "asc"}, "type": "sequence"},
        {"data": {"field": "main_heroid", "order": "desc"}, "type": "sequence"}
    ],
    "fields": [
        "main_hero",
        "main_hero_appearance_rate",
        "main_hero_ban_rate",
        "main_hero_channel",
        "main_hero_win_rate",
        "main_heroid",
        "data.sub_hero.hero",
        "data.sub_hero.hero_channel",
        "data.sub_hero.increase_win_rate",
        "data.sub_hero.heroid"
    ]
}

response = requests.post(url, headers=headers, json=payload)

if response.status_code == 200:
    data = response.json()

    hero_list = []
    for record in data.get("data", {}).get("records", []):
        hero_data = record["data"]
        hero_list.append({
            "Hero Name": hero_data["main_hero"]["data"]["name"],
            "Hero ID": hero_data["main_heroid"],
            "Win Rate": hero_data["main_hero_win_rate"],
            "Pick Rate": hero_data["main_hero_appearance_rate"],
            "Ban Rate": hero_data["main_hero_ban_rate"],
        })

    # Simpan ke CSV
    df = pd.DataFrame(hero_list)
    df.to_csv("data.csv", index=False, encoding="utf-8")

    print("Data berhasil disimpan ke data.csv")

else:
    print(f"Request gagal! Status code: {response.status_code}")
    print(response.text)


Data berhasil disimpan ke data.csv


In [3]:
df = pd.read_csv("data.csv")
df.head()

Unnamed: 0,Hero Name,Hero ID,Win Rate,Pick Rate,Ban Rate
0,Lapu-Lapu,37,0.477881,0.001948,0.000178
1,Baxia,87,0.510701,0.000571,0.000199
2,Grock,44,0.450297,0.00136,0.000274
3,Thamuz,72,0.466466,0.002573,0.000299
4,Kimmy,71,0.424788,0.001217,0.000303


# Analisis Data

**Menganaslis data yang sudah di scraping**

**Analis data bisa dilakukan lebih mendalam mulai dari mencari nilai maksimal, minimum, mencari korelasi dll**

**Semakin dalam menganalisis semakin baik modelnya**

**disini saya hanya mencari nilai rata rata nya saja**

In [5]:
import pandas as pd

df = pd.read_csv("data.csv")

avg_win_rate = df["Win Rate"].mean()
avg_pick_rate = df["Pick Rate"].mean()
avg_ban_rate = df["Ban Rate"].mean()

print(f"Rata-rata Win Rate: {avg_win_rate:.4f}")
print(f"Rata-rata Pick Rate: {avg_pick_rate:.4f}")
print(f"Rata-rata Ban Rate: {avg_ban_rate:.4f}")


Rata-rata Win Rate: 0.4964
Rata-rata Pick Rate: 0.0078
Rata-rata Ban Rate: 0.0705


# Label Data

**Kemudian berikan label manual kalo sudah di analisis**

**bisa juga otomatis menggunakan K-means**

In [6]:
def classify_hero(row):
    if row["Ban Rate"] > 0.2 and row["Win Rate"] > 0.52:
        return "OP"
    elif row["Pick Rate"] > 0.015 and row["Win Rate"] > 0.51:
        return "Meta"
    elif row["Ban Rate"] < 0.2 and row["Win Rate"] > 0.53 and row["Pick Rate"] < 0.01:
        return "Underrated"
    else:
        return "Biasa"

df["Sentimen"] = df.apply(classify_hero, axis=1)

df.to_csv("data_with_sentiment.csv", index=False)

In [7]:
df.head(100)

Unnamed: 0,Hero Name,Hero ID,Win Rate,Pick Rate,Ban Rate,Sentimen
0,Lapu-Lapu,37,0.477881,0.001948,0.000178,Biasa
1,Baxia,87,0.510701,0.000571,0.000199,Biasa
2,Grock,44,0.450297,0.001360,0.000274,Biasa
3,Thamuz,72,0.466466,0.002573,0.000299,Biasa
4,Kimmy,71,0.424788,0.001217,0.000303,Biasa
...,...,...,...,...,...,...
95,Belerick,70,0.502779,0.009768,0.021522,Biasa
96,Yi Sun-shin,30,0.542490,0.011139,0.021746,Biasa
97,Karina,8,0.456817,0.006923,0.022532,Biasa
98,Angela,55,0.498695,0.015253,0.025049,Biasa


In [8]:
df_meta = df[df["Sentimen"] == "Underrated"]
print(df_meta[["Hero Name", "Sentimen"]])


          Hero Name    Sentimen
24            Masha  Underrated
42         Minotaur  Underrated
62         Carmilla  Underrated
64           Lolita  Underrated
66            Alice  Underrated
69   Popol and Kupa  Underrated
90           Wanwan  Underrated
105           Argus  Underrated
106         Melissa  Underrated


In [9]:
df_sentimen= pd.read_csv("/content/data_with_sentiment.csv")
df_sentimen.head()

Unnamed: 0,Hero Name,Hero ID,Win Rate,Pick Rate,Ban Rate,Sentimen
0,Lapu-Lapu,37,0.477881,0.001948,0.000178,Biasa
1,Baxia,87,0.510701,0.000571,0.000199,Biasa
2,Grock,44,0.450297,0.00136,0.000274,Biasa
3,Thamuz,72,0.466466,0.002573,0.000299,Biasa
4,Kimmy,71,0.424788,0.001217,0.000303,Biasa


# Pelatihan Model

**di sini saya melakukan modelling dengan metode xgboost jika akurasi dengan metode lain lebih bagus gunakan metode yang lain**


**Akurasi Training: 1.0000
Akurasi Testing: 0.9615**

In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedKFold, GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from xgboost import XGBClassifier
import joblib

df = pd.read_csv("data_with_sentiment.csv")

X = df[["Win Rate", "Pick Rate", "Ban Rate"]]
label_encoder = LabelEncoder()
df["Sentimen_Label"] = label_encoder.fit_transform(df["Sentimen"])
y = df["Sentimen_Label"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

param_grid = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 5, 7],
    'min_child_weight': [1, 3, 5],
    'gamma': [0, 0.1, 0.3]
}

grid_search = GridSearchCV(XGBClassifier(random_state=42), param_grid,
                           cv=StratifiedKFold(n_splits=5),
                           scoring='accuracy', n_jobs=-1, verbose=1)

grid_search.fit(X_train, y_train)

best_xgb_model = grid_search.best_estimator_

y_pred_train = best_xgb_model.predict(X_train)
y_pred_test = best_xgb_model.predict(X_test)

accuracy_train = accuracy_score(y_train, y_pred_train)
accuracy_test = accuracy_score(y_test, y_pred_test)

print(f"Akurasi Training: {accuracy_train:.4f}")
print(f"Akurasi Testing: {accuracy_test:.4f}")
print(f"Hyperparameter Terbaik: {grid_search.best_params_}")

Fitting 5 folds for each of 243 candidates, totalling 1215 fits




Akurasi Training: 1.0000
Akurasi Testing: 0.9615
Hyperparameter Terbaik: {'gamma': 0, 'learning_rate': 0.01, 'max_depth': 3, 'min_child_weight': 1, 'n_estimators': 100}


In [11]:
print(dict(enumerate(label_encoder.classes_)))


{0: 'Biasa', 1: 'Meta', 2: 'OP', 3: 'Underrated'}


# Ekspor Model

**Eksport model yang telah di latih**

In [None]:
import joblib

joblib.dump(best_xgb_model, "sentimen_model.pkl")

joblib.dump(label_encoder, "label_encoder.pkl")

print("Model dan encoder telah disimpan!")

Model dan encoder telah disimpan!


In [None]:
import joblib
import numpy as np

model = joblib.load("sentimen_ml.pkl")
label_encoder = joblib.load("label_ml.pkl")

data_grock = np.array([[0.57, 0.0026, 0.000254]])

pred_label = model.predict(data_grock)[0]
pred_sentimen = label_encoder.inverse_transform([pred_label])[0]

print(f"Sentimen untuk Grock: {pred_sentimen}")

Sentimen untuk Grock: Underrated
