<a href="https://colab.research.google.com/github/Almamun809/Daily-NLP/blob/main/Sentiment_Analysis_Baseline_Method.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score, KFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from xgboost import XGBClassifier, DMatrix

# Assuming you have your training data in a pandas DataFrame called 'data'
X = data['Data']  # Feature
y = data['Label']  # Target variable

# Convert categorical features to numeric codes
X = X.astype('category').cat.codes

# Define the XGBoost classifier
classifier = XGBClassifier(objective='multi:softmax', num_class=3)

# Perform 5-fold cross-validation
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

# Initialize lists to store evaluation metric scores
accuracy_scores = []
precision_scores = []
recall_scores = []
f1_scores = []

# Iterate over the cross-validation folds
for train_index, test_index in kfold.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Create DMatrix for XGBoost with categorical feature support
    dtrain = DMatrix(X_train, label=y_train, enable_categorical=True)
    dtest = DMatrix(X_test, label=y_test, enable_categorical=True)

    # Fit the classifier on the training data
    classifier.fit(X_train, y_train)

    # Predict the labels for the test data
    y_pred = classifier.predict(X_test)

    # Calculate evaluation metric scores
    accuracy_scores.append(accuracy_score(y_test, y_pred))
    precision_scores.append(precision_score(y_test, y_pred, average='macro'))
    recall_scores.append(recall_score(y_test, y_pred, average='macro'))
    f1_scores.append(f1_score(y_test, y_pred, average='macro'))

# Calculate average scores across all folds
accuracy_avg = np.mean(accuracy_scores)
precision_avg = np.mean(precision_scores)
recall_avg = np.mean(recall_scores)
f1_avg = np.mean(f1_scores)

# Print the evaluation metrics
print(f'Accuracy: {accuracy_avg}')
print(f'Precision: {precision_avg}')
print(f'Recall: {recall_avg}')
print(f'F1 Score: {f1_avg}')