In [None]:
%pip install catboost==1.2
%pip install xgboost

In [None]:
import pandas as pd 
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
from catboost import CatBoostClassifier
from xgboost import XGBClassifier


import warnings
warnings.filterwarnings("ignore")

In [7]:
equal_df_essay = pd.read_csv('equal_essay_all_features.csv')
different_df_essay = pd.read_csv('different_essay_all_features.csv')

equal_df_news = pd.read_csv('equal_news_all_features.csv')
different_df_news = pd.read_csv('different_news_all_features.csv')

equal_df_blogs = pd.read_csv('equal_blogs_all_features.csv')
different_df_blogs = pd.read_csv('different_blogs_all_features.csv')

## **Normalization**

In [18]:
equal_df_essay.drop(['text', 'lemmatized_text', 'syntax_markup', 'morph_markup', 'n_words'], axis=1, inplace=True)
equal_df_essay.reset_index(drop=True, inplace=True)

equal_df_news.drop(['text', 'lemmatized_text', 'syntax_markup', 'morph_markup', 'n_words'], axis=1, inplace=True)
equal_df_news.reset_index(drop=True, inplace=True)

equal_df_blogs.drop(['text', 'lemmatized_text', 'syntax_markup', 'morph_markup', 'n_words'], axis=1, inplace=True)
equal_df_blogs.reset_index(drop=True, inplace=True)

In [19]:
different_df_essay.drop(['text', 'lemmatized_text', 'syntax_markup', 'morph_markup', 'n_words'], axis=1, inplace=True)
different_df_essay.reset_index(drop=True, inplace=True)

different_df_news.drop(['text', 'lemmatized_text', 'syntax_markup', 'morph_markup', 'n_words'], axis=1, inplace=True)
different_df_news.reset_index(drop=True, inplace=True)

different_df_blogs.drop(['text', 'lemmatized_text', 'syntax_markup', 'morph_markup', 'n_words'], axis=1, inplace=True)
different_df_blogs.reset_index(drop=True, inplace=True)

In [21]:
class_equal_essay = equal_df_essay['class']
class_different_essay = different_df_essay['class']

class_equal_news = equal_df_news['class']
class_different_news = different_df_news['class']

class_equal_blogs = equal_df_blogs['class']
class_different_blogs = different_df_blogs['class']

In [23]:
equal_df_essay.drop(['class'], axis=1, inplace=True)
equal_df_essay.reset_index(drop=True, inplace=True)

equal_df_news.drop(['class'], axis=1, inplace=True)
equal_df_news.reset_index(drop=True, inplace=True)

equal_df_blogs.drop(['class'], axis=1, inplace=True)
equal_df_blogs.reset_index(drop=True, inplace=True)

In [24]:
different_df_essay.drop(['class'], axis=1, inplace=True)
different_df_essay.reset_index(drop=True, inplace=True)

different_df_news.drop(['class'], axis=1, inplace=True)
different_df_news.reset_index(drop=True, inplace=True)

different_df_blogs.drop(['class'], axis=1, inplace=True)
different_df_blogs.reset_index(drop=True, inplace=True)

In [25]:
scaler_equal_essay = StandardScaler()
scaler_equal_news = StandardScaler()
scaler_equal_blogs = StandardScaler()

scaler_different_essay = StandardScaler()
scaler_different_news = StandardScaler()
scaler_different_blogs = StandardScaler()

In [26]:
equal_df_essay = pd.DataFrame(scaler_equal_essay.fit_transform(equal_df_essay), columns=equal_df_essay.columns)
equal_df_news = pd.DataFrame(scaler_equal_news.fit_transform(equal_df_news), columns=equal_df_news.columns)
equal_df_blogs = pd.DataFrame(scaler_equal_blogs.fit_transform(equal_df_blogs), columns=equal_df_blogs.columns)

different_df_essay = pd.DataFrame(scaler_different_essay.fit_transform(different_df_essay), columns=different_df_essay.columns)
different_df_news = pd.DataFrame(scaler_different_news.fit_transform(different_df_news), columns=different_df_news.columns)
different_df_blogs = pd.DataFrame(scaler_different_blogs.fit_transform(different_df_blogs), columns=different_df_blogs.columns)

## **RandomForestClassifier**

### **Equal essays as train sample**

In [None]:
equal_rf = RandomForestClassifier(max_depth=10, min_samples_leaf=1, min_samples_split=2, n_estimators=300, random_state=42)
equal_rf.fit(equal_df_essay, class_equal_essay)

In [30]:
# News as test sample
y_pred_equal_news = equal_rf.predict(equal_df_news)
print("Classification Report:")
print(classification_report(class_equal_news, y_pred_equal_news))

Classification Report:
              precision    recall  f1-score   support

           0       0.76      0.89      0.82      1433
           1       0.90      0.78      0.83      1814

    accuracy                           0.83      3247
   macro avg       0.83      0.83      0.83      3247
weighted avg       0.84      0.83      0.83      3247



In [31]:
# Blogs as test sample
y_pred_equal_blogs = equal_rf.predict(equal_df_blogs)
print("Classification Report:")
print(classification_report(class_equal_blogs, y_pred_equal_blogs))

Classification Report:
              precision    recall  f1-score   support

           0       0.76      0.84      0.80      1386
           1       0.85      0.78      0.81      1619

    accuracy                           0.81      3005
   macro avg       0.81      0.81      0.81      3005
weighted avg       0.81      0.81      0.81      3005



### **Equal news as train sample**

In [32]:
equal_rf_news = RandomForestClassifier(max_depth=10, min_samples_leaf=1, min_samples_split=2, n_estimators=300,  random_state=42)
equal_rf_news.fit(equal_df_news, class_equal_news)

In [33]:
# Essays as test sample
y_pred_equal_essay = equal_rf_news.predict(equal_df_essay)
print("Classification Report:")
print(classification_report(class_equal_essay, y_pred_equal_essay))

Classification Report:
              precision    recall  f1-score   support

           0       0.85      0.87      0.86      1257
           1       0.88      0.87      0.88      1452

    accuracy                           0.87      2709
   macro avg       0.87      0.87      0.87      2709
weighted avg       0.87      0.87      0.87      2709



In [34]:
# Blogs as test sample
y_pred_equal_blogs_2 = equal_rf_news.predict(equal_df_blogs)
print("Classification Report:")
print(classification_report(class_equal_blogs, y_pred_equal_blogs_2))

Classification Report:
              precision    recall  f1-score   support

           0       0.77      0.77      0.77      1386
           1       0.80      0.80      0.80      1619

    accuracy                           0.78      3005
   macro avg       0.78      0.78      0.78      3005
weighted avg       0.78      0.78      0.78      3005



### **Equal blogs as train sample**

In [35]:
equal_rf_blogs = RandomForestClassifier(max_depth=10, min_samples_leaf=1, min_samples_split=2, n_estimators=300,  random_state=42)
equal_rf_blogs.fit(equal_df_blogs, class_equal_blogs)

In [36]:
# Essays as test sample
y_pred_equal_essay_2 = equal_rf_blogs.predict(equal_df_essay)
print("Classification Report:")
print(classification_report(class_equal_essay, y_pred_equal_essay_2))

Classification Report:
              precision    recall  f1-score   support

           0       0.89      0.93      0.91      1257
           1       0.94      0.90      0.92      1452

    accuracy                           0.91      2709
   macro avg       0.91      0.92      0.91      2709
weighted avg       0.92      0.91      0.91      2709



In [37]:
# News as test sample
y_pred_equal_news_2 = equal_rf_blogs.predict(equal_df_news)
print("Classification Report:")
print(classification_report(class_equal_news, y_pred_equal_news_2))

Classification Report:
              precision    recall  f1-score   support

           0       0.81      0.93      0.86      1433
           1       0.94      0.83      0.88      1814

    accuracy                           0.87      3247
   macro avg       0.87      0.88      0.87      3247
weighted avg       0.88      0.87      0.87      3247



### **Different essays as train sample**

In [38]:
different_rf = RandomForestClassifier(max_depth=5, min_samples_leaf=1, min_samples_split=2, n_estimators=300,  random_state=42)
different_rf.fit(different_df_essay, class_different_essay)

In [39]:
# Blogs as test sample
y_pred_different_blogs = different_rf.predict(different_df_blogs)
print("Classification Report:")
print(classification_report(class_different_blogs, y_pred_different_blogs))

Classification Report:
              precision    recall  f1-score   support

           0       0.78      0.77      0.78      1336
           1       0.81      0.82      0.82      1619

    accuracy                           0.80      2955
   macro avg       0.80      0.80      0.80      2955
weighted avg       0.80      0.80      0.80      2955



In [40]:
# News as test sample
y_pred_different_news = different_rf.predict(different_df_news)
print("Classification Report:")
print(classification_report(class_different_news, y_pred_different_news))

Classification Report:
              precision    recall  f1-score   support

           0       0.81      0.86      0.83      1366
           1       0.89      0.85      0.87      1801

    accuracy                           0.85      3167
   macro avg       0.85      0.85      0.85      3167
weighted avg       0.85      0.85      0.85      3167



### **Different news as train sample**

In [41]:
different_rf_news = RandomForestClassifier(max_depth=10, min_samples_leaf=1, min_samples_split=2, n_estimators=300,  random_state=42)
different_rf_news.fit(different_df_news, class_different_news)

In [42]:
# Essays as test sample
y_pred_different_essay = different_rf_news.predict(different_df_essay)
print("Classification Report:")
print(classification_report(class_different_essay, y_pred_different_essay))

Classification Report:
              precision    recall  f1-score   support

           0       0.85      0.81      0.83       616
           1       0.85      0.88      0.86       740

    accuracy                           0.85      1356
   macro avg       0.85      0.85      0.85      1356
weighted avg       0.85      0.85      0.85      1356



In [43]:
# Blogs as test sample
y_pred_different_blogs_2 = different_rf_news.predict(different_df_blogs)
print("Classification Report:")
print(classification_report(class_different_blogs, y_pred_different_blogs_2))

Classification Report:
              precision    recall  f1-score   support

           0       0.76      0.75      0.75      1336
           1       0.79      0.80      0.80      1619

    accuracy                           0.78      2955
   macro avg       0.77      0.77      0.77      2955
weighted avg       0.78      0.78      0.78      2955



### **Different blogs as train sample**

In [44]:
different_rf_blogs = RandomForestClassifier(max_depth=10, min_samples_leaf=1, min_samples_split=2, n_estimators=300,  random_state=42)
different_rf_blogs.fit(different_df_blogs, class_different_blogs)

In [45]:
# Essays as test sample
y_pred_different_essay_2 = different_rf_blogs.predict(different_df_essay)
print("Classification Report:")
print(classification_report(class_different_essay, y_pred_different_essay_2))

Classification Report:
              precision    recall  f1-score   support

           0       0.92      0.93      0.92       616
           1       0.94      0.93      0.94       740

    accuracy                           0.93      1356
   macro avg       0.93      0.93      0.93      1356
weighted avg       0.93      0.93      0.93      1356



In [46]:
# News as test sample
y_pred_different_news_2 = different_rf_blogs.predict(different_df_news)
print("Classification Report:")
print(classification_report(class_different_news, y_pred_different_news_2))

Classification Report:
              precision    recall  f1-score   support

           0       0.79      0.94      0.86      1366
           1       0.95      0.81      0.88      1801

    accuracy                           0.87      3167
   macro avg       0.87      0.88      0.87      3167
weighted avg       0.88      0.87      0.87      3167



## **CatBoostClassifier**

### **Equal essays as train sample**

In [47]:
catboost_model_equal = CatBoostClassifier(depth=6, learning_rate=0.1, verbose=False, random_state=42)
catboost_model_equal.fit(equal_df_essay, class_equal_essay)

<catboost.core.CatBoostClassifier at 0x2864ce2d0>

In [48]:
# News as test sample
catboost_pred_equal_news = catboost_model_equal.predict(equal_df_news)

print(classification_report(class_equal_news, catboost_pred_equal_news))

              precision    recall  f1-score   support

           0       0.91      0.89      0.90      1433
           1       0.92      0.93      0.92      1814

    accuracy                           0.91      3247
   macro avg       0.91      0.91      0.91      3247
weighted avg       0.91      0.91      0.91      3247



In [49]:
# Blogs as test sample
catboost_pred_equal_blogs = catboost_model_equal.predict(equal_df_blogs)

print(classification_report(class_equal_blogs, catboost_pred_equal_blogs))

              precision    recall  f1-score   support

           0       0.84      0.81      0.82      1386
           1       0.84      0.87      0.85      1619

    accuracy                           0.84      3005
   macro avg       0.84      0.84      0.84      3005
weighted avg       0.84      0.84      0.84      3005



### **Equal news as train sample**

In [50]:
catboost_model_equal_news = CatBoostClassifier(depth=4, learning_rate=0.1, verbose=False, random_state=42)
catboost_model_equal_news.fit(equal_df_news, class_equal_news)

<catboost.core.CatBoostClassifier at 0x2864cf610>

In [51]:
# Blogs as test sample
catboost_pred_equal_blogs_2 = catboost_model_equal_news.predict(equal_df_blogs)

print(classification_report(class_equal_blogs, catboost_pred_equal_blogs_2))

              precision    recall  f1-score   support

           0       0.81      0.79      0.80      1386
           1       0.83      0.84      0.83      1619

    accuracy                           0.82      3005
   macro avg       0.82      0.82      0.82      3005
weighted avg       0.82      0.82      0.82      3005



In [52]:
# Essays as test sample
catboost_pred_equal_essay = catboost_model_equal_news.predict(equal_df_essay)

print(classification_report(class_equal_essay, catboost_pred_equal_essay))

              precision    recall  f1-score   support

           0       0.91      0.91      0.91      1257
           1       0.92      0.92      0.92      1452

    accuracy                           0.92      2709
   macro avg       0.92      0.92      0.92      2709
weighted avg       0.92      0.92      0.92      2709



### **Equal blogs as train sample**

In [53]:
catboost_model_equal_blogs = CatBoostClassifier(depth=4, learning_rate=0.1, verbose=False, random_state=42)
catboost_model_equal_blogs.fit(equal_df_blogs, class_equal_blogs)

<catboost.core.CatBoostClassifier at 0x286529250>

In [54]:
# News as test sample
catboost_pred_equal_news_2 = catboost_model_equal_blogs.predict(equal_df_news)

print(classification_report(class_equal_news, catboost_pred_equal_news_2))

              precision    recall  f1-score   support

           0       0.85      0.93      0.89      1433
           1       0.94      0.87      0.90      1814

    accuracy                           0.90      3247
   macro avg       0.89      0.90      0.90      3247
weighted avg       0.90      0.90      0.90      3247



In [55]:
# Essay as test sample
catboost_pred_equal_essay_2 = catboost_model_equal_blogs.predict(equal_df_essay)

print(classification_report(class_equal_essay, catboost_pred_equal_essay_2))

              precision    recall  f1-score   support

           0       0.89      0.88      0.88      1257
           1       0.90      0.90      0.90      1452

    accuracy                           0.89      2709
   macro avg       0.89      0.89      0.89      2709
weighted avg       0.89      0.89      0.89      2709



### **Different essays as train sample**

In [56]:
catboost_model_different = CatBoostClassifier(depth=6, learning_rate=0.1, verbose=False, random_state=42)
catboost_model_different.fit(different_df_essay, class_different_essay)

<catboost.core.CatBoostClassifier at 0x28652af50>

In [57]:
# News as test sample
catboost_pred_different_news = catboost_model_different.predict(different_df_news)

print(classification_report(class_different_news, catboost_pred_different_news))

              precision    recall  f1-score   support

           0       0.88      0.88      0.88      1366
           1       0.91      0.91      0.91      1801

    accuracy                           0.90      3167
   macro avg       0.90      0.90      0.90      3167
weighted avg       0.90      0.90      0.90      3167



In [58]:
# Blogs as test sample
catboost_pred_different_blogs = catboost_model_different.predict(different_df_blogs)

print(classification_report(class_different_blogs, catboost_pred_different_blogs))

              precision    recall  f1-score   support

           0       0.83      0.80      0.82      1336
           1       0.84      0.86      0.85      1619

    accuracy                           0.84      2955
   macro avg       0.83      0.83      0.83      2955
weighted avg       0.84      0.84      0.84      2955



### **Different news as train sample**

In [59]:
catboost_model_different_news = CatBoostClassifier(depth=6, learning_rate=0.1, verbose=False, random_state=42)
catboost_model_different_news.fit(different_df_news, class_different_news)

<catboost.core.CatBoostClassifier at 0x28641c3d0>

In [60]:
# Essay as test sample
catboost_pred_different_essay = catboost_model_different_news.predict(different_df_essay)

print(classification_report(class_different_essay, catboost_pred_different_essay))

              precision    recall  f1-score   support

           0       0.93      0.87      0.90       616
           1       0.90      0.94      0.92       740

    accuracy                           0.91      1356
   macro avg       0.91      0.91      0.91      1356
weighted avg       0.91      0.91      0.91      1356



In [61]:
# Blogs as test sample
catboost_pred_different_blogs = catboost_model_different_news.predict(different_df_blogs)

print(classification_report(class_different_blogs, catboost_pred_different_blogs))

              precision    recall  f1-score   support

           0       0.81      0.79      0.80      1336
           1       0.83      0.84      0.84      1619

    accuracy                           0.82      2955
   macro avg       0.82      0.82      0.82      2955
weighted avg       0.82      0.82      0.82      2955



### **Different blogs as train sample**

In [62]:
catboost_model_different_blogs = CatBoostClassifier(depth=6, learning_rate=0.1, verbose=False, random_state=42)
catboost_model_different_blogs.fit(different_df_blogs, class_different_blogs)

<catboost.core.CatBoostClassifier at 0x286566e90>

In [63]:
# Essays as test sample
catboost_pred_different_essay_2 = catboost_model_different_blogs.predict(different_df_essay)

print(classification_report(class_different_essay, catboost_pred_different_essay_2))

              precision    recall  f1-score   support

           0       0.95      0.90      0.92       616
           1       0.92      0.96      0.94       740

    accuracy                           0.93      1356
   macro avg       0.93      0.93      0.93      1356
weighted avg       0.93      0.93      0.93      1356



In [64]:
# News as test sample
catboost_pred_different_news_2 = catboost_model_different_blogs.predict(different_df_news)

print(classification_report(class_different_news, catboost_pred_different_news_2))

              precision    recall  f1-score   support

           0       0.84      0.94      0.89      1366
           1       0.95      0.86      0.91      1801

    accuracy                           0.90      3167
   macro avg       0.90      0.90      0.90      3167
weighted avg       0.90      0.90      0.90      3167



## **XGBoost Classifier**

### **Equal essays as train sample**

In [65]:
xgboost_model_equal = XGBClassifier(depth=6, learning_rate=0.1, max_depth=4)
xgboost_model_equal.fit(equal_df_essay, class_equal_essay)

In [66]:
# News as test sample
xgboost_pred_equal_news = xgboost_model_equal.predict(equal_df_news)

print(classification_report(class_equal_news, xgboost_pred_equal_news))

              precision    recall  f1-score   support

           0       0.85      0.88      0.87      1433
           1       0.90      0.88      0.89      1814

    accuracy                           0.88      3247
   macro avg       0.88      0.88      0.88      3247
weighted avg       0.88      0.88      0.88      3247



In [67]:
# Blogs as test sample
xgboost_pred_equal_blogs = xgboost_model_equal.predict(equal_df_blogs)

print(classification_report(class_equal_blogs, xgboost_pred_equal_blogs))

              precision    recall  f1-score   support

           0       0.81      0.81      0.81      1386
           1       0.84      0.84      0.84      1619

    accuracy                           0.83      3005
   macro avg       0.83      0.83      0.83      3005
weighted avg       0.83      0.83      0.83      3005



### **Equal news as train sample**

In [68]:
xgboost_model_equal_news = XGBClassifier(learning_rate=0.1, max_depth=4)
xgboost_model_equal_news.fit(equal_df_news, class_equal_news)

In [69]:
# Essays as test sample
xgboost_pred_equal_essay = xgboost_model_equal_news.predict(equal_df_essay)

print(classification_report(class_equal_essay, xgboost_pred_equal_essay))

              precision    recall  f1-score   support

           0       0.88      0.90      0.89      1257
           1       0.91      0.90      0.90      1452

    accuracy                           0.90      2709
   macro avg       0.90      0.90      0.90      2709
weighted avg       0.90      0.90      0.90      2709



In [70]:
# Blogs as test sample
xgboost_pred_equal_blogs_2 = xgboost_model_equal_news.predict(equal_df_blogs)

print(classification_report(class_equal_blogs, xgboost_pred_equal_blogs))

              precision    recall  f1-score   support

           0       0.81      0.81      0.81      1386
           1       0.84      0.84      0.84      1619

    accuracy                           0.83      3005
   macro avg       0.83      0.83      0.83      3005
weighted avg       0.83      0.83      0.83      3005



### **Equal blogs as train sample**

In [71]:
xgboost_model_equal_blogs = XGBClassifier(learning_rate=0.1, max_depth=4)
xgboost_model_equal_blogs.fit(equal_df_blogs, class_equal_blogs)

In [72]:
# Essays as test sample
xgboost_pred_equal_essay_2 = xgboost_model_equal_blogs.predict(equal_df_essay)

print(classification_report(class_equal_essay, xgboost_pred_equal_essay_2))

              precision    recall  f1-score   support

           0       0.91      0.89      0.90      1257
           1       0.91      0.92      0.91      1452

    accuracy                           0.91      2709
   macro avg       0.91      0.91      0.91      2709
weighted avg       0.91      0.91      0.91      2709



In [73]:
# News as test samples
xgboost_pred_equal_news_2 = xgboost_model_equal_blogs.predict(equal_df_news)

print(classification_report(class_equal_news, xgboost_pred_equal_news_2))

              precision    recall  f1-score   support

           0       0.84      0.91      0.87      1433
           1       0.93      0.86      0.89      1814

    accuracy                           0.88      3247
   macro avg       0.88      0.89      0.88      3247
weighted avg       0.89      0.88      0.88      3247



### **Different essays as train sample**

In [74]:
xgboost_model_different = XGBClassifier(learning_rate=0.1, max_depth=4)
xgboost_model_different.fit(different_df_essay, class_different_essay)

In [75]:
# News as test sample
xgboost_pred_different_news = xgboost_model_different.predict(different_df_news)

print(classification_report(class_different_news, xgboost_pred_different_news))

              precision    recall  f1-score   support

           0       0.84      0.88      0.86      1366
           1       0.90      0.87      0.89      1801

    accuracy                           0.87      3167
   macro avg       0.87      0.87      0.87      3167
weighted avg       0.87      0.87      0.87      3167



In [76]:
# Blogs as test sample
xgboost_pred_different_blogs = xgboost_model_different.predict(different_df_blogs)

print(classification_report(class_different_blogs, xgboost_pred_different_blogs))

              precision    recall  f1-score   support

           0       0.79      0.80      0.80      1336
           1       0.84      0.83      0.83      1619

    accuracy                           0.82      2955
   macro avg       0.81      0.81      0.81      2955
weighted avg       0.82      0.82      0.82      2955



### **Different news as train sample**

In [77]:
xgboost_model_different_news = XGBClassifier(learning_rate=0.1, max_depth=4)
xgboost_model_different_news.fit(different_df_news, class_different_news)

In [78]:
# Essays as test sample
xgboost_pred_different_essay = xgboost_model_different_news.predict(different_df_essay)

print(classification_report(class_different_essay, xgboost_pred_different_essay))

              precision    recall  f1-score   support

           0       0.90      0.83      0.86       616
           1       0.87      0.92      0.89       740

    accuracy                           0.88      1356
   macro avg       0.88      0.88      0.88      1356
weighted avg       0.88      0.88      0.88      1356



In [79]:
# Blogs as test sample
xgboost_pred_different_blogs_2 = xgboost_model_different_news.predict(different_df_blogs)

print(classification_report(class_different_blogs, xgboost_pred_different_blogs_2))

              precision    recall  f1-score   support

           0       0.78      0.76      0.77      1336
           1       0.81      0.82      0.82      1619

    accuracy                           0.80      2955
   macro avg       0.79      0.79      0.79      2955
weighted avg       0.80      0.80      0.80      2955



### **Different blogs as train sample**

In [80]:
xgboost_model_different_blogs = XGBClassifier(learning_rate=0.1, max_depth=4)
xgboost_model_different_blogs.fit(different_df_blogs, class_different_blogs)

In [81]:
# Essays as test sample
xgboost_pred_different_essay_2 = xgboost_model_different_blogs.predict(different_df_essay)

print(classification_report(class_different_essay, xgboost_pred_different_essay_2))

              precision    recall  f1-score   support

           0       0.95      0.90      0.92       616
           1       0.92      0.96      0.94       740

    accuracy                           0.93      1356
   macro avg       0.93      0.93      0.93      1356
weighted avg       0.93      0.93      0.93      1356



In [82]:
# News as test sample
xgboost_pred_different_news_2 = xgboost_model_different_blogs.predict(different_df_news)

print(classification_report(class_different_news, xgboost_pred_different_news_2))

              precision    recall  f1-score   support

           0       0.81      0.93      0.86      1366
           1       0.94      0.84      0.88      1801

    accuracy                           0.87      3167
   macro avg       0.87      0.88      0.87      3167
weighted avg       0.88      0.87      0.88      3167

