In [176]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

In [178]:
column_names = ['id', 'game', 'sentiment', 'text']

In [180]:
# Loading the Dataset
training_data = pd.read_csv('twitter_training.csv', names=column_names)  # Eğitim verisi
validation_data = pd.read_csv('twitter_validation.csv', names=column_names)  # Doğrulama verisi

In [181]:
training_data.head(-30)

Unnamed: 0,id,game,sentiment,text
0,2401,Borderlands,Positive,im getting on borderlands and i will murder yo...
1,2401,Borderlands,Positive,I am coming to the borders and I will kill you...
2,2401,Borderlands,Positive,im getting on borderlands and i will kill you ...
3,2401,Borderlands,Positive,im coming on borderlands and i will murder you...
4,2401,Borderlands,Positive,im getting on borderlands 2 and i will murder ...
...,...,...,...,...
74647,9195,Nvidia,Neutral,NVIDIA has released a security update for its ...
74648,9195,Nvidia,Neutral,NVIDIA has released a security update for its ...
74649,9195,Nvidia,Neutral,NVIDIA released another software update for it...
74650,9195,Nvidia,Neutral,NVIDIA released a new security update for its ...


In [184]:
print(training_data.isnull().sum())

id             0
game           0
sentiment      0
text         686
dtype: int64


In [186]:
print(validation_data.isnull().sum())

id           0
game         0
sentiment    0
text         0
dtype: int64


In [188]:
training_data = training_data.dropna(subset=['text'])

In [190]:
print(training_data.isnull().sum())

id           0
game         0
sentiment    0
text         0
dtype: int64


In [192]:
training_data['game'].unique()

array(['Borderlands', 'CallOfDutyBlackopsColdWar', 'Amazon', 'Overwatch',
       'Xbox(Xseries)', 'NBA2K', 'Dota2', 'PlayStation5(PS5)',
       'WorldOfCraft', 'CS-GO', 'Google', 'AssassinsCreed', 'ApexLegends',
       'LeagueOfLegends', 'Fortnite', 'Microsoft', 'Hearthstone',
       'Battlefield', 'PlayerUnknownsBattlegrounds(PUBG)', 'Verizon',
       'HomeDepot', 'FIFA', 'RedDeadRedemption(RDR)', 'CallOfDuty',
       'TomClancysRainbowSix', 'Facebook', 'GrandTheftAuto(GTA)',
       'MaddenNFL', 'johnson&johnson', 'Cyberpunk2077',
       'TomClancysGhostRecon', 'Nvidia'], dtype=object)

In [194]:
training_data['sentiment'].unique()

array(['Positive', 'Neutral', 'Negative', 'Irrelevant'], dtype=object)

In [196]:
def preprocess_text(text):
    text = str(text)
    # Convert to lower case
    text = text.lower()
    # Removing URLs (if any)
    text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
    # Remove usernames (@username)
    text = re.sub(r'@\w+', '', text)
    # Removing special characters and numbers
    text = re.sub(r'\W', ' ', text)
    text = re.sub(r'\d', ' ', text)
    # Removing excess spaces
    text = re.sub(r'\s+', ' ', text).strip()
    return text

In [198]:
# Preprocessing texts
training_data['text'] = training_data['text'].apply(preprocess_text)
validation_data['text'] = validation_data['text'].apply(preprocess_text)

# We merge the columns 'game' and 'text'
X_train = training_data['game'] + " " + training_data['text']
y_train = training_data['sentiment']
X_val = validation_data['game'] + " " + validation_data['text']
y_val = validation_data['sentiment']

# Converting texts with TF-IDF
vectorizer = TfidfVectorizer(stop_words='english', ngram_range=(1, 2))
X_train_tfidf = vectorizer.fit_transform(X_train)
X_val_tfidf = vectorizer.transform(X_val)

# Train the model
model = MultinomialNB()
model.fit(X_train_tfidf, y_train)

# Prediction
y_pred = model.predict(X_val_tfidf)

# Model Evaluation
print('Doğruluk:', accuracy_score(y_val, y_pred))
print(classification_report(y_val, y_pred))

Doğruluk: 0.964
              precision    recall  f1-score   support

  Irrelevant       1.00      0.89      0.94       172
    Negative       0.95      0.98      0.96       266
     Neutral       0.98      0.97      0.98       285
    Positive       0.94      0.99      0.96       277

    accuracy                           0.96      1000
   macro avg       0.97      0.96      0.96      1000
weighted avg       0.97      0.96      0.96      1000



In [199]:
# Receiving data input from the user
game_input = input("input the game name: ")
text_input = input("input the text: ")

# Preprocessing the input
input_data = [game_input + " " + text_input]
input_data_preprocessed = [preprocess_text(text) for text in input_data]

# Convert with TF-IDF
input_tfidf = vectorizer.transform(input_data_preprocessed)

# prediction
predicted_sentiment = model.predict(input_tfidf)

print("Predicted emotion:", predicted_sentiment[0])

input the game name:  Borderland
input the text:  this game is so bad


Predicted emotion: Negative
