## Import necessary libraries

In [6]:
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import pandas as pd

## Load and splite the data


In [None]:
data = pd.read_csv('C:/Users/windows 10/MyProject/DeepLearning/Data/Translated_texts.csv')

# Clean the dataset (remove unnecessary columns)
columns_to_remove = ['name', 'date', 'likes', 'source']
data = data.drop(columns=columns_to_remove, errors='ignore')
data = data.dropna(subset=['comment', 'sentiment'])

# Define features and target
X = data['comment']
y = data['sentiment']

# Splite the data(80 % for Train, 20% for Test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



## Set hyperparameters manually

In [None]:
# set hyperparameters for each model
manual_hyperparams = {
    "Naive Bayes": {
        "tfidf_params": {"max_df": 0.9, "min_df": 2, "ngram_range": (1, 2)},
        "model_params": {"alpha": 0.5}
    },
    "Decision Tree": {
        "tfidf_params": {"max_df": 0.95, "min_df": 2, "ngram_range": (1, 2)},
        "model_params": {"max_depth": None, "min_samples_split": 2}
    },
    "Support Vector Machine": {
        "tfidf_params": {"max_df": 0.95, "min_df": 2, "ngram_range": (1, 1)},
        "model_params": {"C": 1, "gamma": 1}
    },
    "Logistic Regression": {
        "tfidf_params": {"max_df": 0.9, "min_df": 2, "ngram_range": (1, 1)},
        "model_params": {"C": 1, "solver": "liblinear"}
    }
}

# Store models
models = {
    "Naive Bayes": MultinomialNB,
    "Decision Tree": DecisionTreeClassifier,
    "Support Vector Machine": SVC,
    "Logistic Regression": LogisticRegression
}



In [None]:
for model_name, model_class in models.items():
    print(f"Training {model_name}...")
    
    # Create the pipeline 
    tfidf_params = manual_hyperparams[model_name]["tfidf_params"]
    model_params = manual_hyperparams[model_name]["model_params"]
    
    pipeline = Pipeline([
        ('tfidf', TfidfVectorizer(**tfidf_params)),
        ('model', model_class(**model_params))
    ])
    
    # Train the model
    pipeline.fit(X_train, y_train)
    
    # Calculate training accuracy
    y_train_pred = pipeline.predict(X_train)
    train_accuracy = accuracy_score(y_train, y_train_pred)
    
    # Calculate testing accuracy
    y_test_pred = pipeline.predict(X_test)
    test_accuracy = accuracy_score(y_test, y_test_pred)
    
    # Evaluate testing performance
    precision, recall, f1, _ = precision_recall_fscore_support(y_test, y_test_pred, average='weighted')
    
    
    print(f"Model: {model_name}")
    print(f"=======>>>Training : {train_accuracy:.2f}")
    
    print(f"Testing Accuracy: {test_accuracy:.2f}")
    print(f"Precision: {precision:.2f}")
    print(f"Recall: {recall:.2f}")
    print(f"F1-score: {f1:.2f}")
    print("=" * 60)


In [14]:
data.head(10)

Unnamed: 0,comment,sentiment
0,استفيدوا عروض جازي عايله الجديده توالم كامل اف...,0
1,Conx ta3koum dayra ki lhaam w say,-1
3,فهمتش sim وحده يخدمو العاءله كامله,0
4,السماح غمزة السماح,1
5,connection khraa welat,-1
6,2500DA trimestre,0
7,Barkawna bla 5orti cnx ta3kom dayra kile5ra,-1
8,Le Prix combien,0
9,Pppf yaw diro 100 Go b 100da kan bghito je sui...,-1
10,La connexion ta3kom est براز,-1
