In [10]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

def preprocess_data(df):
    """
    Preprocess the data for training.

    Args:
        df: DataFrame with English and Hindi text columns.

    Returns:
        X: TF-IDF features for English texts.
        y: Target labels derived from Hindi texts.
    """
    # Extract English and Hindi texts
    english_texts = df['ENGLISH'].astype(str)
    hindi_texts = df['HINDI'].astype(str)

    # Initialize TF-IDF Vectorizer for English texts
    vectorizer_english = TfidfVectorizer(max_features=5000)

    # Transform English texts to TF-IDF features
    X = vectorizer_english.fit_transform(english_texts)

    # Convert Hindi texts to categorical labels
    y, _ = pd.factorize(hindi_texts)  # Convert Hindi texts to numeric labels

    return X, y

def train_mlp_classifier(X_train, y_train):
    """
    Train an MLP Classifier on the training data.

    Args:
        X_train: Training features.
        y_train: Training labels.

    Returns:
        Trained MLPClassifier model.
    """
    mlp = MLPClassifier(
        hidden_layer_sizes=(100,),
        activation='relu',
        max_iter=1000,
        learning_rate_init=0.01,
        solver='adam'
    )
    mlp.fit(X_train, y_train)
    return mlp

def print_f1_score(y_true, y_pred):
    """
    Print the overall F1 score of the classification.

    Args:
        y_true: True labels.
        y_pred: Predicted labels.
    """
    report = classification_report(y_true, y_pred, output_dict=True, zero_division=0)

    # Compute and print the macro F1 score
    f1_macro = report['macro avg']['f1-score']
    print(f"Macro F1 Score: {f1_macro:.4f}")

def main():
    # Load your dataset
    df = pd.read_excel('Book1.xlsx')

    # Preprocess the data
    X, y = preprocess_data(df)

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train the MLP Classifier
    mlp = train_mlp_classifier(X_train, y_train)

    # Make predictions
    y_pred = mlp.predict(X_test)

    # Print evaluation metrics
    accuracy = accuracy_score(y_test, y_pred)
    print(f"MLPClassifier Accuracy: {accuracy:.4f}")

    # Print F1 score
    print_f1_score(y_test, y_pred)

if __name__ == "__main__":
    main()


MLPClassifier Accuracy: 0.0228
Macro F1 Score: 0.0114
