In [None]:
import pandas as pd
import re
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed, interact_manual, HBox, VBox
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from IPython.display import display
from sklearn.metrics import accuracy_score


: 

In [None]:
!pip install ipywidgets

In [None]:
# Custom Urdu-compatible tokenizer using regex
def urdu_tokenizer(text):
    tokens = re.findall(r'[\u0600-\u06FF]+|[a-zA-Z]+|\d+|[^\s\w]', text)
    return tokens

# Function to train models
def train_models():
    # Load Excel file
    df = pd.read_excel("C:\python\2022-CS-656(NLP Assignment)\Datasets\Final DataSet.xlsx")
    df.columns = ['Urdu Sentence', 'Sentiment']
    df.dropna(inplace=True)

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(df['Urdu Sentence'], df['Sentiment'], test_size=0.2, random_state=42)

    # Urdu TF-IDF vectorization using the custom tokenizer
    vectorizer = TfidfVectorizer(tokenizer=urdu_tokenizer,token_pattern=None, ngram_range=(1, 2), max_features=5000)
    X_train_vec = vectorizer.fit_transform(X_train)
    X_test_vec = vectorizer.transform(X_test)

    # Define models
    models = {
        'Naive Bayes': MultinomialNB(),
        'Logistic Regression': LogisticRegression(max_iter=1000),
        'SVM': LinearSVC(),
        'Random Forest': RandomForestClassifier(n_estimators=100)
    }

    # Store models for future use
    trained_models = {}
    
    for name, model in models.items():
        model.fit(X_train_vec, y_train)
        trained_models[name] = model
        
    return trained_models, X_test_vec, y_test, vectorizer,X_test

# Function to test models and display results
def test_models(trained_models, X_test_vec, y_test, vectorizer):
    for name, model in trained_models.items():
        y_pred = model.predict(X_test_vec)
        acc = accuracy_score(y_test, y_pred)
        print(f"\n📊 Model: {name}")
        print(f"✅ Accuracy: {acc:.4f}")
        print(classification_report(y_test, y_pred))


In [None]:
# Create button for training
train_button = widgets.Button(description="Train")
output_train = widgets.Output()

# Create button for testing
test_button = widgets.Button(description="Test")
output_test = widgets.Output()

# Define train button behavior
def on_train_button_click(b):
    with output_train:
        print("Training models...")
        trained_models, X_test_vec, y_test, vectorizer, X_test = train_models()
        print("Models trained successfully!")
        # Enable the test button after training
        test_button.disabled = False
        # Save models and vectorizer for testing
        globals()['trained_models'] = trained_models
        globals()['X_test_vec'] = X_test_vec
        globals()['y_test'] = y_test
        globals()['vectorizer'] = vectorizer
        globals()['X_test'] = X_test 

train_button.on_click(on_train_button_click)


def on_test_button_click(b):
    with output_test:
        print("Testing models...\n")
        
        # Step 1: Keep the original evaluation output
        test_models(trained_models, X_test_vec, y_test, vectorizer)

        # Step 2: Identify the best model and save predictions
        best_accuracy = 0
        best_model_name = None
        best_predictions = None

        for name, model in trained_models.items():
            y_pred = model.predict(X_test_vec)
            acc = accuracy_score(y_test, y_pred)
            if acc > best_accuracy:
                best_accuracy = acc
                best_model_name = name
                best_predictions = y_pred

        # Step 3: Generate Excel file
        output_df = pd.DataFrame({
            'Urdu Sentence': X_test.reset_index(drop=True),
            'Actual Sentiment': y_test.reset_index(drop=True),
            'Predicted Sentiment': best_predictions
        })

        output_filename = 'Sentiment_Predictions.xlsx'
        output_df.to_excel(output_filename, index=False)

               # Step 4: Show result
        print(f"\n✅ Best Model: {best_model_name} (Accuracy: {best_accuracy:.4f})")
        print(f"📁 Excel file '{output_filename}' generated with predictions.")
        display(output_df.head(10))  # Optional: preview first 10 rows

        # Step 5: Show correct/incorrect counts
        correct = (y_test.reset_index(drop=True) == best_predictions).sum()
        incorrect = len(y_test) - correct
        print(f"\n✔️ Total Correct Predictions: {correct}")
        print(f"❌ Total Incorrect Predictions: {incorrect}")


test_button.on_click(on_test_button_click)

# Display buttons and outputs
VBox([train_button, output_train, test_button, output_test])