# 📱 Analisis Komentar Aplikasi Mobile Legends: Bang Bang
Analisis ini mencakup:
- Scraping komentar dari Google Play
- Preprocessing teks
- Analisis sentimen
- Clustering (unsupervised)
- Visualisasi dan pelabelan cluster

In [None]:
!pip install google-play-scraper
!pip install pandas matplotlib seaborn wordcloud nltk scikit-learn

In [None]:
from google_play_scraper import reviews, Sort
import pandas as pd

# Ambil komentar
result, _ = reviews(
    'com.mobile.legends',
    lang='id', country='id', sort=Sort.NEWEST, count=200
)

df = pd.DataFrame(result)[['userName', 'score', 'content']]
df.head()

In [None]:
import re
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords

stop_words = set(stopwords.words('indonesian'))

def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    return ' '.join([w for w in text.split() if w not in stop_words])

df['clean_content'] = df['content'].apply(clean_text)
df.head()

In [None]:
# Label sentimen dari skor
def label_sentiment(score):
    if score >= 4:
        return 'positif'
    elif score == 3:
        return 'netral'
    else:
        return 'negatif'

df['sentiment'] = df['score'].apply(label_sentiment)
df['sentiment'].value_counts().plot(kind='bar', title='Distribusi Sentimen')

In [None]:
from wordcloud import WordCloud
import matplotlib.pyplot as plt

text = ' '.join(df['clean_content'])
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)

plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.title('Wordcloud Komentar Mobile Legends')
plt.show()

In [None]:
# TF-IDF dan klasifikasi
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

tfidf = TfidfVectorizer(max_features=1000)
X = tfidf.fit_transform(df['clean_content'])
y = df['sentiment']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

models = {
    "Naive Bayes": MultinomialNB(),
    "Logistic Regression": LogisticRegression(max_iter=200),
    "Random Forest": RandomForestClassifier()
}

for name, model in models.items():
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    print(f"\nModel: {name}")
    print("Akurasi:", accuracy_score(y_test, preds))
    print(classification_report(y_test, preds))

In [None]:
# Clustering K-Means
from sklearn.cluster import KMeans
n_clusters = 3
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
kmeans.fit(X)
df['cluster'] = kmeans.labels_
df['cluster'].value_counts()

In [None]:
# Top kata di tiap cluster
terms = tfidf.get_feature_names_out()
for i in range(n_clusters):
    print(f"\nTop kata di cluster {i}:")
    center = kmeans.cluster_centers_[i]
    top_indices = center.argsort()[-10:][::-1]
    print(", ".join([terms[ind] for ind in top_indices]))

In [None]:
# Contoh komentar tiap cluster
for i in range(n_clusters):
    print(f"\nContoh komentar di Cluster {i}:")
    print(df[df['cluster'] == i]['content'].sample(3, random_state=1).to_string(index=False))

In [None]:
# Tambah label manual berdasarkan topik
cluster_labels = {
    0: "Keluhan Bug",
    1: "Pujian Gameplay",
    2: "Masalah Akun"
}

df['cluster_label'] = df['cluster'].map(cluster_labels)
df[['content', 'cluster', 'cluster_label']].head()

In [None]:
# Visualisasi cluster dengan PCA
from sklearn.decomposition import PCA

X_pca = PCA(n_components=2).fit_transform(X.toarray())

plt.figure(figsize=(8, 6))
scatter = plt.scatter(X_pca[:, 0], X_pca[:, 1], c=df['cluster'], cmap='viridis')
plt.legend(*scatter.legend_elements(), title="Cluster")
plt.title("Visualisasi Clustering Komentar")
plt.xlabel("PCA 1")
plt.ylabel("PCA 2")
plt.show()