<a href="https://colab.research.google.com/github/Ganesh-Navadeep/POS-tagging-/blob/main/POS_tagging.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import (
    Embedding, Conv1D, MaxPooling1D, LSTM, Bidirectional, Dense, Dropout,
    Input, GlobalMaxPooling1D, BatchNormalization, Flatten
)
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight

# Load dataset
df = pd.read_csv("/content/drive/MyDrive/sanskrit_dataset_balanced.csv")

# Encode words and POS tags
label_encoder = LabelEncoder()
pos_tags_encoded = label_encoder.fit_transform(df["POS"].values)
num_classes = len(label_encoder.classes_)

# Tokenization (Character-Level Encoding)
max_length = 30  # Fixed for efficiency
X = pad_sequences(df["Word"].astype(str).apply(lambda x: [ord(c) for c in x]),
                  maxlen=max_length, padding='post')

y = to_categorical(pos_tags_encoded, num_classes=num_classes)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# Compute class weights
class_weights = dict(enumerate(compute_class_weight("balanced", classes=np.unique(pos_tags_encoded), y=pos_tags_encoded)))

# Build Optimized Model (Fast Execution)
def build_model():
    input_layer = Input(shape=(max_length,))

    # **CNN-based Character Embedding**
    embedding_layer = Embedding(input_dim=30000, output_dim=64, input_length=max_length)(input_layer)
    cnn_layer = Conv1D(filters=32, kernel_size=3, activation='relu', padding='same')(embedding_layer)
    cnn_layer = MaxPooling1D(pool_size=2)(cnn_layer)

    # **BiLSTM Layer (Reduced Complexity)**
    bilstm_layer = Bidirectional(LSTM(32, return_sequences=True, dropout=0.2))(cnn_layer)

    # **Global Max Pooling instead of Attention (Faster)**
    pooling_layer = GlobalMaxPooling1D()(bilstm_layer)

    # **Dense Layers**
    dense_layer = Dense(32, activation='relu')(pooling_layer)
    dense_layer = BatchNormalization()(dense_layer)
    dropout_layer = Dropout(0.2)(dense_layer)

    # **Output Layer**
    output_layer = Dense(num_classes, activation='softmax')(dropout_layer)

    model = keras.Model(inputs=input_layer, outputs=output_layer)
    model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(learning_rate=0.001), metrics=['accuracy'])
    return model

model = build_model()

# Train Model
early_stopping = keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=3, restore_best_weights=True)
history = model.fit(X_train, y_train, epochs=15, batch_size=1024, validation_data=(X_test, y_test),
                    class_weight=class_weights, callbacks=[early_stopping])

# Save Model
model.save("sanskrit_pos_model_fast.h5")

# Print Accuracy
train_acc = history.history['accuracy'][-1]
val_acc = history.history['val_accuracy'][-1]
print(f"Model Training Completed in <2 min!\nTraining Accuracy: {train_acc:.4f}\nValidation Accuracy: {val_acc:.4f}")





Epoch 1/15
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 190ms/step - accuracy: 0.2649 - loss: 1.7135 - val_accuracy: 0.3543 - val_loss: 1.8297
Epoch 2/15
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 231ms/step - accuracy: 0.5884 - loss: 0.9659 - val_accuracy: 0.3518 - val_loss: 1.6787
Epoch 3/15
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 165ms/step - accuracy: 0.7257 - loss: 0.6938 - val_accuracy: 0.4557 - val_loss: 1.5459
Epoch 4/15
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 271ms/step - accuracy: 0.7793 - loss: 0.5642 - val_accuracy: 0.5540 - val_loss: 1.4362
Epoch 5/15
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 161ms/step - accuracy: 0.8042 - loss: 0.4843 - val_accuracy: 0.7000 - val_loss: 1.3331
Epoch 6/15
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 233ms/step - accuracy: 0.8264 - loss: 0.4200 - val_accuracy: 0.7527 - val_loss: 1.2301
Epoch 7/15
[1m24/24[0m [



Model Training Completed in <2 min!
Training Accuracy: 0.9286
Validation Accuracy: 0.9418


In [None]:
from google.colab import drive
drive.mount('/content/drive')