In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import nltk
import re
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import LinearSVC
from gensim.models import Word2Vec
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from keras.utils import to_categorical

nltk.download('punkt')
nltk.download('punkt_tab')
nltk.download('stopwords')

from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize


[nltk_data] Downloading package punkt to C:\Users\Vrooh/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\Vrooh/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Vrooh/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [None]:
dfb = pd.read_csv('dataset_init.csv')
dfb.head()

Unnamed: 0,review,sentiment
0,"enjoyable, though it gets worse once you get t...",positive
1,good game,positive
2,I confirmed that the residents of the house ar...,positive
3,"This game was a real surprise, it pretty much ...",positive
4,great game I was reluctant to play as I had hi...,positive


### DATASET : ###

Dataset diambil dari hasil scraoing pada scraping_game.py. Data berupa data review game Resident Evil 7 : Biohazard. Di Steam terdapat 60rb+ Reviews, dan yang berhasil di ambil adalah 30rb+ data. Data Reviews Steam cukup menyenangkan untuk digunakan, karena sudah memiliki klasifikasi tersendiri seperti positive, negative dan sebagainya.

In [None]:
# Daftar kata-kata yang menunjukkan review "netral"
neutral_keywords = ['ok', 'fine', 'decent', 'not bad', 'good enough', 'good game', 'average', 'mediocre', 'acceptable', 'satisfactory', 'not great', 'not terrible']

# jumlah kata untuk dianggap netral (Tidak Digunakan, karena setelah tweak berkali-kali model memiliki akurasi rendah)
min_words_netral = 2
max_words_netral = 5

# 5. Fungsi relabel gabungan
def relabel(row):
    text = str(row['review']).lower()
    num_words = len(text.split())
    
    # Prioritas 1: Jika mengandung kata netral
    if any(keyword in text for keyword in neutral_keywords):
        return 'Neutral'  # Netral

    # Prioritas 2: Jika panjang kalimat pendek
    # if min_words_netral <= num_words <= max_words_netral:
    #     return 'Neutral'  # Netral
    
    # Kalau tidak memenuhi semua di atas, pakai label asli
    return row['sentiment']

# Terapkan fungsi relabel
dfb['new_sentiment'] = dfb.apply(relabel, axis=1)

# save
dfb.to_csv('dataset_relabelled.csv', index=False)

print("Relabeling selesai! Dataset baru disimpan di 'dataset_relabelled.csv'.")

Relabeling selesai! Dataset baru disimpan di 'dataset_relabelled.csv'.


In [None]:
df = pd.read_csv('dataset_relabelled.csv')
df.head()

Unnamed: 0,review,sentiment,new_sentiment
0,"enjoyable, though it gets worse once you get t...",positive,positive
1,good game,positive,Neutral
2,I confirmed that the residents of the house ar...,positive,positive
3,"This game was a real surprise, it pretty much ...",positive,positive
4,great game I was reluctant to play as I had hi...,positive,positive


Berdasarkan Dataset terbaru, dapat kita perhatikan pada label new_sentimen terdapat label tambahan yaitu neutral. Hal ini berasal dari kode sebelumnya yang ditambahkan secara eksplisit pada neutral_keywords. Tentunya hal ini tidak ideal untuk dilakukan untuk dataset yang besar karena tidak dinamis dan "canggih".

In [36]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 34336 entries, 0 to 34335
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   review         34246 non-null  object
 1   sentiment      34336 non-null  object
 2   new_sentiment  34336 non-null  object
dtypes: object(3)
memory usage: 804.9+ KB


In [None]:
stop_words = set(stopwords.words('english'))

def preprocess_text(text):
    # Menangani NaN dan tipe data non-string
    if not isinstance(text, str):
        return ''  # Kembalikan string kosong jika bukan string
    
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)  # hapus karaktwr selain huruf dan spasi
    tokens = nltk.word_tokenize(text)
    tokens = [word for word in tokens if word not in stop_words]
    return ' '.join(tokens)

# buat kolom baru
df['cleaned_review'] = df['review'].apply(preprocess_text)
print(df.head())

                                              review sentiment new_sentiment  \
0  enjoyable, though it gets worse once you get t...  positive      positive   
1                                          good game  positive       Neutral   
2  I confirmed that the residents of the house ar...  positive      positive   
3  This game was a real surprise, it pretty much ...  positive      positive   
4  great game I was reluctant to play as I had hi...  positive      positive   

                                      cleaned_review  
0  enjoyable though gets worse get halfway point ...  
1                                          good game  
2              confirmed residents house infact evil  
3  game real surprise pretty much saved resident ...  
4  great game reluctant play high expectations re...  


Setelah menimplmentasikan stopwords, dapat kita lihat pada cleaned_review data sudah substring semua, tidak terdapat karakter khusus dan sebagainya.

In [38]:
vectorizer = TfidfVectorizer(max_features=5000)
X = vectorizer.fit_transform(df['cleaned_review']).toarray()
y = df['new_sentiment'].map({'positive':1, 'negative':0, 'Neutral':2}).values

In [None]:
#Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [40]:
# One-hot encode the labels
y_train = to_categorical(y_train, num_classes=3)
y_test = to_categorical(y_test, num_classes=3)

In [None]:
#Model Building (Deep Learning)
model = Sequential()
model.add(Dense(512, activation='relu', input_shape=(X_train.shape[1],)))
model.add(Dropout(0.5))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(3, activation='softmax'))

# model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
model.compile(optimizer=Adam(learning_rate=0.001), 
              loss='categorical_crossentropy', 
              metrics=['accuracy'],
              weighted_metrics=['accuracy'])

# training
# history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

class_weights = {0: 2, 1: 1, 2: 2} 
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test), class_weight=class_weights)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m859/859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 18ms/step - accuracy: 0.8236 - loss: 0.7536 - weighted_accuracy: 0.7577 - val_accuracy: 0.9024 - val_loss: 0.2866 - val_weighted_accuracy: 0.9024
Epoch 2/10
[1m859/859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 17ms/step - accuracy: 0.9303 - loss: 0.3197 - weighted_accuracy: 0.9125 - val_accuracy: 0.9067 - val_loss: 0.2757 - val_weighted_accuracy: 0.9067
Epoch 3/10
[1m859/859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 17ms/step - accuracy: 0.9573 - loss: 0.2032 - weighted_accuracy: 0.9471 - val_accuracy: 0.9113 - val_loss: 0.2824 - val_weighted_accuracy: 0.9113
Epoch 4/10
[1m859/859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 17ms/step - accuracy: 0.9719 - loss: 0.1482 - weighted_accuracy: 0.9649 - val_accuracy: 0.9093 - val_loss: 0.3236 - val_weighted_accuracy: 0.9093
Epoch 5/10
[1m859/859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 17ms/step - accuracy: 0.97

Saya melakukan penambahan bobot pada kelas negatif dan netral, hal ini dikarenakan class tersebut merupakan minoritas pada dataset yang saya gunakan. Dengan beberapa tweak, akhirnya akurasi yang dihasilkan cukup baik di angka 91%. Terutama untuk Pada training, akurasi sangat tinggi hampir 100% (98%)

In [42]:
# Prediksi & Evaluasi
y_pred = model.predict(X_test)
y_pred = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test, axis=1)

from sklearn.metrics import classification_report
print(classification_report(y_true, y_pred))

[1m215/215[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
              precision    recall  f1-score   support

           0       0.59      0.39      0.47       287
           1       0.93      0.96      0.95      5267
           2       0.86      0.83      0.85      1314

    accuracy                           0.91      6868
   macro avg       0.80      0.73      0.75      6868
weighted avg       0.91      0.91      0.91      6868



In [None]:
# Inference
def predict_sentiment(text):
    cleaned = preprocess_text(text)
    vect_text = vectorizer.transform([cleaned]).toarray() 
    prediction = model.predict(vect_text)
    
    # Dapatkan probabilitas untuk setiap kelas
    positive_prob = prediction[0][1]
    neutral_prob = prediction[0][2]
    negative_prob = prediction[0][0]
    
    print(f"Probabilities - Positive: {positive_prob}, Neutral: {neutral_prob}, Negative: {negative_prob}")
    
    if positive_prob > neutral_prob and positive_prob > negative_prob:
        return "Positive"
    elif neutral_prob > positive_prob and neutral_prob > negative_prob:
        return "Neutral"
    else:
        return "Negative"


# Test Sampel
sample_1 = "This game is awesome!"
sample_2 = "good game"
sample_3 = "This is Stupid, I hate it."

print(f"Predicted Sentiment for '{sample_1}': {predict_sentiment(sample_1)}")
print(f"Predicted Sentiment for '{sample_2}': {predict_sentiment(sample_2)}")
print(f"Predicted Sentiment for '{sample_3}': {predict_sentiment(sample_3)}")


Probabilities - Positive: o, Neutral: s, Negative: p
Predicted Sentiment for 'This game is awesome!': Neutral
Probabilities - Positive: e, Neutral: u, Negative: N
Predicted Sentiment for 'good game': Neutral
Probabilities - Positive: e, Neutral: g, Negative: n
Predicted Sentiment for 'This is Stupid, I hate it.': Negative


Berdasarkan inference diatas, dapat kita perhatikan untuk beberapa sample tampak valid meski mungkin tidak seakurat yang diharapkan karena kesalahan dalam memilih dataset yang memiliki review general sangat tinggi di Steam (Overwhemengly positive pada Resident Evil 7 : Biohazard).

In [44]:
# TF-IDF
vectorizer = TfidfVectorizer(max_features=10000, ngram_range=(1,2))
X_tfidf = vectorizer.fit_transform(df['cleaned_review'])
y = df['new_sentiment']

# Split 80/20
X_train, X_test, y_train, y_test = train_test_split(X_tfidf, y, test_size=0.2, random_state=42)

In [45]:
# SVM
model = LinearSVC()
model.fit(X_train, y_train)

# Eval
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

     Neutral       0.97      0.89      0.93      1314
    negative       0.77      0.32      0.45       287
    positive       0.94      0.99      0.97      5267

    accuracy                           0.94      6868
   macro avg       0.89      0.73      0.78      6868
weighted avg       0.94      0.94      0.94      6868



Berdasarkan hasil percobaan training diatas, SVM Memiliki Accuracy yang lebih baik (94%) dengan keseimbangan yang cukup merata untuk setiap kelas, meski review negativce masih lebih rendah dibanding kelas lainnya.

In [46]:
# Tokenisasi
sentences = [text.split() for text in df['cleaned_review']]

# Word2Vec
w2v_model = Word2Vec(sentences, vector_size=100, window=5, min_count=2)
def get_avg_vector(words):
    valid = [w2v_model.wv[word] for word in words if word in w2v_model.wv]
    return np.mean(valid, axis=0) if valid else np.zeros(100)

X_vec = np.array([get_avg_vector(s) for s in sentences])
y = df['new_sentiment']

In [47]:
# Split 70/30
X_train, X_test, y_train, y_test = train_test_split(X_vec, y, test_size=0.3, random_state=42)

# Random Forest
rf_model = RandomForestClassifier(n_estimators=100)
rf_model.fit(X_train, y_train)

In [48]:
# Eval
y_pred = rf_model.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

     Neutral       0.74      0.42      0.53      1921
    negative       0.68      0.07      0.13       447
    positive       0.84      0.97      0.90      7933

    accuracy                           0.83     10301
   macro avg       0.75      0.49      0.52     10301
weighted avg       0.81      0.83      0.80     10301



Untuk Random Forest dapat kita perhatikan Skornya cukup buruk dibanding SVM Dan DeepLearning, dengan Accuracy hanya 83%, dan F1 score yang sangat rendah pada kelas minoritas (negative)

---

## Insight ##

1. Model berfungsi baik dengan akurasi yang signifikan cukup tinggi. (98% pada proses training dan validasi 91%)
2. Performa Kelas tidak seimbang : Ini sangat wajar dikarenakan kesalahan dasar saya memilih game untuk dataset dengan review Steam Overwhemingly positive. Dari 65rb review, 95% nya positif. Hal ini menyebabkan kelas sangat tidak seimbang, meski menjadi tantangan tersendiri dalam proses penyesuaian seperti menambah bobot lebih pada kelas kelas minoritas tersebut
3. Epoch yang optimal : Berdasarkan hasil dari epoch ke-1 hingga ke-10, model menunjukkan peningkatan performa yang signifikan
4. Pada perbandingan dengan skema pelatiahn lainnya, kurang lebih hasilnya seperti ini :
- SVM adalah model terbaik, degan akurasi dan macro average-nya tertinggi.
- Deep Learning mendekati target 92%, jadi bisa dioptimalkan lebih lanjut
- Random Forest kurang layak dipilih, terutama karena f1-score-nya terlalu rendah di kelas minoritas.