In [8]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
import joblib
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import confusion_matrix, classification_report

# Load datasets
languages = ['eng_tamil']
label = {
    '0': 'homophobic',
    '1': 'normal',
    '2': 'transphobic'
}
data = {}

for lang in languages:
    data[lang] = {
        'train': pd.read_csv(f"/content/eng_tam_train_prepro_nonaug.csv"),
        'test': pd.read_csv(f"/content/eng_tam_test_prepro_nonaug.csv"),
        'dev': pd.read_csv(f"/content/eng_tam_dev_prepro_nonaug.csv")
    }

# Encode labels
label_encoder = LabelEncoder()
for lang in languages:
    data[lang]['train']['label'] = label_encoder.fit_transform(data[lang]['train']['label'])
    data[lang]['test']['label'] = label_encoder.transform(data[lang]['test']['label'])

# Feature extraction
vectorizer = TfidfVectorizer(max_features=5000)
X_train = vectorizer.fit_transform(data[lang]['train']['text'])
X_test = vectorizer.transform(data[lang]['test']['text'])
y_train = data[lang]['train']['label']
y_test = data[lang]['test']['label']

# Build the ANN model with increased hidden layers and dropout
model = Sequential([
    Dense(512, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(3, activation='softmax')
])

# Compile the model
model.compile(optimizer=Adam(lr=0.001),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
history = model.fit(X_train.toarray(), y_train, epochs=20, batch_size=32, validation_split=0.1)

# Evaluate the model
y_pred = np.argmax(model.predict(X_test.toarray()), axis=-1)
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))



Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Confusion Matrix:
[[  21   66    1]
 [  24 1051   10]
 [   6   22    6]]

Classification Report:
              precision    recall  f1-score   support

           0       0.41      0.24      0.30        88
           1       0.92      0.97      0.95      1085
           2       0.35      0.18      0.24        34

    accuracy                           0.89      1207
   macro avg       0.56      0.46      0.49      1207
weighted avg       0.87      0.89      0.88      1207

