In [None]:
import fasttext
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import pandas as pd

# Load your dataset
data_path = 'TWITTER+youtube.csv'
df = pd.read_csv(data_path, encoding='latin1')

# Prepare a FastText compatible input file for both "Fake" and "Hate" detection
fasttext_input_file = 'fasttext_input.txt'
with open(fasttext_input_file, 'w', encoding='utf-8') as f:
    for index, row in df.iterrows():
        text = row['Tweet'].replace('\n', '')  # Remove newline characters
        fake_label = '_labelfake' if row['Fake'] == 1 else '__label_not_fake'
        hate_label = '_labelhate' if row['Hate'] == 1 else '__label_not_hate'
        line = f"{fake_label} {hate_label} {text}\n"
        f.write(line)

# Initialize a list to capture the training loss
loss_values = []

# Train a FastText model for both "Fake" and "Hate" detection
model = fasttext.train_supervised(
    input=fasttext_input_file,
    loss='ova',  # 'ova' stands for one-vs-all (multilabel classification)
    verbose=1,
    thread=4  # You can adjust the number of threads for training
)

# Save the trained model to a file
model_output_path = 'fasttext_model.bin'
model.save_model(model_output_path)

# Test the model for "Fake" and "Hate" detection
X_test = df['Tweet']
y_test_fake = df['Fake']
y_test_hate = df['Hate']

def predict(text):
    labels, _ = model.predict(text.replace('\n', ''), k=2)
    return int('_labelfake' in labels) , int('__label_hate' in labels)

y_pred_fake, y_pred_hate = zip(*X_test.apply(predict))

# Compute confusion matrices
cm_fake = confusion_matrix(y_test_fake, y_pred_fake)
cm_hate = confusion_matrix(y_test_hate, y_pred_hate)

# Compute classification reports
report_fake = classification_report(y_test_fake, y_pred_fake, target_names=["Not Fake", "Fake"])
report_hate = classification_report(y_test_hate, y_pred_hate, target_names=["Not Hate", "Hate"])

print("Confusion Matrix for 'Fake' detection:")
print(cm_fake)

print("\nClassification Report for 'Fake' detection:")
print(report_fake)

print("\nConfusion Matrix for 'Hate' detection:")
print(cm_hate)

print("\nClassification Report for 'Hate' detection:")
print(report_hate)

# Manual calculation of training loss values
for epoch in range(1, 11):  # Adjust the number of epochs as needed
    loss = model.test(fasttext_input_file)
    loss_values.append(loss[1])  # The second element contains the loss

# Plot the loss curve using Matplotlib
plt.figure(figsize=(12, 6))
plt.plot(range(1, 11), loss_values, marker='o', linestyle='-')
plt.title('Training Loss Curve')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.grid()
plt.show()

In [None]:
!pip install fasttext

Collecting fasttext
  Downloading fasttext-0.9.2.tar.gz (68 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m68.8/68.8 kB[0m [31m608.9 kB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pybind11>=2.2 (from fasttext)
  Using cached pybind11-2.11.1-py3-none-any.whl (227 kB)
Building wheels for collected packages: fasttext
  Building wheel for fasttext (setup.py) ... [?25l[?25hdone
  Created wheel for fasttext: filename=fasttext-0.9.2-cp310-cp310-linux_x86_64.whl size=4199774 sha256=902b9907be8858a487af55be34287452699569306e9fdfaefd8f4c4256820e36
  Stored in directory: /root/.cache/pip/wheels/a5/13/75/f811c84a8ab36eedbaef977a6a58a98990e8e0f1967f98f394
Successfully built fasttext
Installing collected packages: pybind11, fasttext
Successfully installed fasttext-0.9.2 pybind11-2.11.1
