In [18]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

# 1. تحميل البيانات
df = pd.read_csv("YoutubeCommentsDataSet.csv")

# 2. اختيار الأعمدة المطلوبة وحذف القيم الفارغة
df = df[['Comment', 'Sentiment']].dropna()

# 3. إضافة تعليقات جديدة كتجربة تنبؤ ضمن البيانات
new_comments = [
    {"Comment": "I really love this video, very helpful!", "Sentiment": None},
    {"Comment": "This is the worst thing I've ever seen.", "Sentiment": None},
    {"Comment": "Not bad, but could be better.", "Sentiment": None},
    {"Comment": "I don’t have any strong opinion about this.", "Sentiment": None}
]

df = pd.concat([df, pd.DataFrame(new_comments)], ignore_index=True)

# 4. تقسيم البيانات إلى بيانات تدريب واختبار
# نأخذ التعليقات التي لديها تصنيف فقط للتدريب والاختبار
train_df = df[df['Sentiment'].notna()]
X_train, X_test, y_train, y_test = train_test_split(
    train_df['Comment'], train_df['Sentiment'], test_size=0.2, random_state=42
)

# 5. تحويل النصوص إلى تمثيل عددي (TF-IDF)
vectorizer = TfidfVectorizer(stop_words='english', max_features=5000)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# 6. إنشاء وتدريب نموذج MultinomialNB
model = MultinomialNB(alpha=1.0)
model.fit(X_train_vec, y_train)

# 7. التنبؤ على بيانات الاختبار وتقييم النموذج
y_pred = model.predict(X_test_vec)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# 8. تجربة التنبؤ على التعليقات الجديدة ضمن الداتا
new_comments_df = df[df['Sentiment'].isna()]  # التعليقات بدون تصنيف
new_comments_vec = vectorizer.transform(new_comments_df['Comment'])
predictions = model.predict(new_comments_vec)

for comment, pred in zip(new_comments_df['Comment'], predictions):
    print(f"Comment: {comment} --> Predicted Sentiment: {pred}")

Accuracy: 0.6945276340865777

Classification Report:
               precision    recall  f1-score   support

    negative       0.64      0.15      0.25       441
     neutral       0.71      0.27      0.39       912
    positive       0.69      0.96      0.81      2320

    accuracy                           0.69      3673
   macro avg       0.68      0.46      0.48      3673
weighted avg       0.69      0.69      0.64      3673

Comment: I really love this video, very helpful! --> Predicted Sentiment: positive
Comment: This is the worst thing I've ever seen. --> Predicted Sentiment: positive
Comment: Not bad, but could be better. --> Predicted Sentiment: positive
Comment: I don’t have any strong opinion about this. --> Predicted Sentiment: positive
