<a href="https://colab.research.google.com/github/Dovud-Asadi/-100-day-NLP-projects/blob/main/demo_multiclass.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## **Libraries**

In [2]:
# !pip install cohere
# !pip install gradio
# !pip install voyageai

In [40]:
import pandas as pd
import numpy as np
import string
import re
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from torch.optim.lr_scheduler import ReduceLROnPlateau
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import cohere
import gradio as gr
import voyageai

## **API**

In [7]:
#Cohere API
api_key_co = "LpQudB9ymyh6GYotUB8NPcqE9A15YBz4XmCQewTJ"
#Voyager AI API
api_key_vo = "pa-08GVFImi310dmwU01vDx-Ue8VBNo36DhshchK0OTlIc"

In [5]:
def embed_text_co(text, api_key):
    co = cohere.Client(api_key)
    response = co.embed(
        texts=[text],
        model="embed-multilingual-v3.0",
        input_type="classification"
    )
    return response.embeddings[0]

In [6]:
def embed_text_vo(text, api_key):
    vo = voyageai.Client(api_key)
    response = vo.embed(
        texts=[text],
        model="voyage-multilingual-2",
        input_type="document"
    )
    return response.embeddings[0]

## **Preprocessing**

In [8]:
# Text preprocessing functions
def preprocess_text(text):
    """Lowercase, remove punctuation (except '), and normalize whitespace."""
    text = text.lower()
    text = text.translate(str.maketrans('', '', string.punctuation.replace("'", "")))
    text = re.sub(r'[^a-zA-Z0-9\s\']', '', text)
    return re.sub(r'\s+', ' ', text).strip()

def preprocess_texts(text_list):
    """Apply `preprocess_text` to a list and join them."""
    return ' '.join(map(preprocess_text, text_list))

def text_processing_pipeline(df):
    def contains_04_0(text_list):
        return any('04.0' in item for item in text_list)

    def filter_long_texts(text_list):
        return [text for text in text_list if len(text) > 4]

    def keep_elements_starting_with_04(text_list):
        return [element for element in text_list if element.startswith('04')]

    def clean_text(text):
        text = text.replace("04.00.00.00 Oila qonunchiligi /", "")
        text = text.strip("[]")
        return text.strip()

    def process_okoz_text(text):
        if '/' in text:
            text = text.split('/')[0].strip()
        return text

    def remove_duplicates(text_list):
        return list(set(text_list))

    def remove_semicolons(text_list):
        return [text.replace(';', '') for text in text_list]

    # Apply preprocessing steps
    df = df[df['okoz_text'].apply(contains_04_0)].copy()
    df.loc[:, 'okoz_text'] = df['okoz_text'].apply(filter_long_texts)
    df.loc[:, 'okoz_text'] = df['okoz_text'].apply(keep_elements_starting_with_04)
    df.loc[:, 'okoz_text'] = df['okoz_text'].apply(lambda texts: [clean_text(text) for text in texts])
    df.loc[:, 'okoz_text'] = df['okoz_text'].apply(lambda texts: [process_okoz_text(text) for text in texts])
    df.loc[:, 'okoz_text'] = df['okoz_text'].apply(remove_duplicates)
    df.loc[:, 'okoz_text'] = df['okoz_text'].apply(remove_semicolons)
    df = df[df['okoz_text'].apply(len) == 1]
    df.loc[:, 'processed_texts'] = df['related_texts'].apply(preprocess_texts)
    df = df.drop(columns=['related_texts'])
    df = df.reset_index(drop=True)

    # Combine the list elements into a single string
    df.loc[:, 'okoz_text'] = df['okoz_text'].apply(lambda x: ' '.join(x))

    return df

In [9]:
def preprocess_data(df, api_key, embedding):

    df = text_processing_pipeline(df)

    df.loc[:, 'embeddings'] = df['processed_texts'].apply(lambda text: embedding(text, api_key))

    # Count the frequency of each class
    class_counts = df['okoz_text'].value_counts()

    # Assign labels based on frequency
    class_threshold = 90
    label_map = {}
    label_counter = 1

    def assign_label(class_name):
        nonlocal label_counter
        count = class_counts[class_name]
        if count < class_threshold:
            return 0
        else:
            if class_name not in label_map:
                label_map[class_name] = label_counter
                label_counter += 1
            return label_map[class_name]

    df.loc[:, 'label'] = df['okoz_text'].apply(assign_label)

    # Select 150 rows with the longest text from class 4
    class_4_df = df[df['label'] == 4].copy()
    class_4_df.loc[:, 'text_length'] = class_4_df['okoz_text'].apply(len)
    class_4_df = class_4_df.sort_values(by='text_length', ascending=False).head(150)
    df = df[df['label'] != 4]
    df = pd.concat([df, class_4_df.drop(columns=['text_length'])], ignore_index=True)

    # Drop unnecessary columns
    columns_to_drop = ['processed_texts', 'okoz_text']
    df = df.drop(columns=columns_to_drop)

    # Prepare the data for the model
    X = np.array(df['embeddings'].tolist())
    y = np.array(df['label'])

    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Convert to PyTorch tensors
    train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.long))
    test_dataset = TensorDataset(torch.tensor(X_test, dtype=torch.float32), torch.tensor(y_test, dtype=torch.long))
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

    return train_loader, test_loader, df, label_map

In [11]:
df = pd.read_json('/content/full_okoz.json')
train_loader, test_loader, df, label_map = preprocess_data(df, api_key_vo, embed_text_vo)

In [12]:
df.to_csv('embedded_text.csv', index=False)

# **Models**

## **Model 1**
* with ELU
* Accuracy: 77%

In [15]:
class MulticlassClassificationModel(nn.Module):
    def __init__(self, input_size, num_classes):
        super(MulticlassClassificationModel, self).__init__()
        self.layer1 = nn.Linear(input_size, 512)
        self.bn1 = nn.BatchNorm1d(512)
        self.layer2 = nn.Linear(512, 1024)
        self.bn2 = nn.BatchNorm1d(1024)
        self.layer3 = nn.Linear(1024, 2048)
        self.bn3 = nn.BatchNorm1d(2048)
        self.layer4 = nn.Linear(2048, 1024)
        self.bn4 = nn.BatchNorm1d(1024)
        self.layer5 = nn.Linear(1024, 512)
        self.bn5 = nn.BatchNorm1d(512)
        self.output = nn.Linear(512, num_classes)

        self.elu = nn.ELU()
        self.dropout1 = nn.Dropout(p=0.3)
        self.dropout2 = nn.Dropout(p=0.4)

    def forward(self, x):
        x = self.elu(self.bn1(self.layer1(x)))
        x = self.dropout1(x)
        x = self.elu(self.bn2(self.layer2(x)))
        x = self.dropout2(x)
        x = self.elu(self.bn3(self.layer3(x)))
        x = self.dropout2(x)
        x = self.elu(self.bn4(self.layer4(x)))
        x = self.dropout2(x)
        x = self.elu(self.bn5(self.layer5(x)))
        x = self.dropout1(x)
        x = self.output(x)
        return x

In [17]:
# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [18]:
input_dim = np.array(df['embeddings'][1]).shape[0]
num_classes = 5
model = MulticlassClassificationModel(input_dim, num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.0001, weight_decay=1e-4)

### **Training**

In [24]:
def train_model(train_loader, model, criterion, optimizer, num_epochs=100):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels.long())  # Also ensure labels are Long type
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)
        # scheduler.step()
        epoch_loss = running_loss / len(train_loader.dataset)
        if epoch % 10 == 0:
            print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}')
    print("Training Complete")

### **Evaluating**

In [20]:
def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for data, targets in test_loader:
            outputs = model(data)
            _, predicted = torch.max(outputs, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()

    accuracy = 100 * correct / total
    print(f"Test Accuracy: {accuracy:.2f}%")
    return accuracy

In [25]:
train_model(train_loader, model, criterion, optimizer, num_epochs=50)
evaluate_model(model, test_loader)

Epoch 1/50, Loss: 0.6555
Epoch 11/50, Loss: 0.0780
Epoch 21/50, Loss: 0.0315
Epoch 31/50, Loss: 0.0339
Epoch 41/50, Loss: 0.0354
Training Complete
Test Accuracy: 77.78%


77.77777777777777

## **Model 2**
* ReLU
* Accuracy: 75%

In [41]:
class MulticlassClassificationModel(nn.Module):
    def __init__(self, input_size, num_classes):
        super(MulticlassClassificationModel, self).__init__()
        self.layer1 = nn.Linear(input_size, 512)
        self.bn1 = nn.BatchNorm1d(512)
        self.layer2 = nn.Linear(512, 1024)
        self.bn2 = nn.BatchNorm1d(1024)
        self.layer3 = nn.Linear(1024, 2048)
        self.bn3 = nn.BatchNorm1d(2048)
        self.layer4 = nn.Linear(2048, 1024)
        self.bn4 = nn.BatchNorm1d(1024)
        self.layer5 = nn.Linear(1024, 512)
        self.bn5 = nn.BatchNorm1d(512)
        self.output = nn.Linear(512, num_classes)

        self.relu = nn.ReLU()
        self.dropout1 = nn.Dropout(p=0.3)
        self.dropout2 = nn.Dropout(p=0.4)

    def forward(self, x):
        x = self.relu(self.bn1(self.layer1(x)))
        x = self.dropout1(x)
        x = self.relu(self.bn2(self.layer2(x)))
        x = self.dropout2(x)
        x = self.relu(self.bn3(self.layer3(x)))
        x = self.dropout2(x)
        x = self.relu(self.bn4(self.layer4(x)))
        x = self.dropout2(x)
        x = self.relu(self.bn5(self.layer5(x)))
        x = self.dropout1(x)
        x = self.output(x)
        return x

In [42]:
input_dim = np.array(df['embeddings'][1]).shape[0]
num_classes = 5
model = MulticlassClassificationModel(input_dim, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3)

In [43]:
def train_model(train_loader, model, criterion, optimizer, scheduler, num_epochs=100):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels.long())
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)
        epoch_loss = running_loss / len(train_loader.dataset)
        scheduler.step(epoch_loss)
        if epoch % 10 == 0:
            print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}')
            print(f'Current Learning Rate: {scheduler.get_last_lr()[0]:.6f}')
    print("Training Complete")

In [44]:
def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for data, targets in test_loader:
            outputs = model(data)
            _, predicted = torch.max(outputs, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()

    accuracy = 100 * correct / total
    print(f"Test Accuracy: {accuracy:.2f}%")
    return accuracy

In [46]:
train_model(train_loader, model, criterion, optimizer, scheduler, num_epochs=50)
evaluate_model(model, test_loader)

Epoch 1/50, Loss: 0.0446
Current Learning Rate: 0.000500
Epoch 11/50, Loss: 0.0184
Current Learning Rate: 0.000125
Epoch 21/50, Loss: 0.0141
Current Learning Rate: 0.000063
Epoch 31/50, Loss: 0.0332
Current Learning Rate: 0.000016
Epoch 41/50, Loss: 0.0133
Current Learning Rate: 0.000008
Training Complete
Test Accuracy: 75.40%


75.39682539682539

## **Model 3**
* Simpler Model
* Accuracy 74%

In [47]:
class OptimizedMulticlassModel(nn.Module):
    def __init__(self, input_size, num_classes):
        super(OptimizedMulticlassModel, self).__init__()
        self.layer1 = nn.Linear(input_size, 512)
        self.layer2 = nn.Linear(512, 1024)
        self.layer3 = nn.Linear(1024, 512)
        self.output = nn.Linear(512, num_classes)

        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.4)
        self.batch_norm1 = nn.BatchNorm1d(512)
        self.batch_norm2 = nn.BatchNorm1d(1024)

    def forward(self, x):
        x = self.relu(self.batch_norm1(self.layer1(x)))
        x = self.dropout(x)
        x = self.relu(self.batch_norm2(self.layer2(x)))
        x = self.dropout(x)
        x = self.relu(self.layer3(x))
        x = self.dropout(x)
        x = self.output(x)
        return x

In [48]:
input_dim = np.array(df['embeddings'][1]).shape[0]
model = OptimizedMulticlassModel(input_size=input_dim, num_classes=num_classes)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = ReduceLROnPlateau(optimizer, 'min')

In [53]:
# Training function
def train_model(train_loader, model, criterion, optimizer, scheduler, num_epochs=100):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        scheduler.step(loss)
        running_loss += loss.item()
        if epoch % 10 == 0:
          print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss:.6f}")

    print("Training Complete")

In [54]:
def evaluate_model(model, test_loader, criterion):
    model.eval()
    correct = 0
    total = 0
    total_loss = 0.0

    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    avg_loss = total_loss / total
    print(f"Test Accuracy: {accuracy:.2f}%, Test Loss: {avg_loss:.4f}")
    return accuracy, avg_loss

In [55]:
train_model(train_loader, model, criterion, optimizer, scheduler, num_epochs=50)
evaluate_model(model, test_loader, criterion)

Epoch 1/50, Loss: 0.0007
Epoch 11/50, Loss: 0.0083
Epoch 21/50, Loss: 0.0012
Epoch 31/50, Loss: 0.0184
Epoch 41/50, Loss: 0.0242
Training Complete
Test Accuracy: 74.60%, Test Loss: 0.0568


(74.60317460317461, 0.056792984879206095)

## **Model 4**
* With 100 epoch
* Accuracy 78%

In [57]:
class MulticlassClassificationModel(nn.Module):
    def __init__(self, input_size, num_classes):
        super(MulticlassClassificationModel, self).__init__()
        self.layer1 = nn.Linear(input_size, 512)
        self.bn1 = nn.BatchNorm1d(512)
        self.layer2 = nn.Linear(512, 1024)
        self.bn2 = nn.BatchNorm1d(1024)
        self.layer3 = nn.Linear(1024, 2048)
        self.bn3 = nn.BatchNorm1d(2048)
        self.layer4 = nn.Linear(2048, 1024)
        self.bn4 = nn.BatchNorm1d(1024)
        self.layer5 = nn.Linear(1024, 512)
        self.bn5 = nn.BatchNorm1d(512)
        self.output = nn.Linear(512, num_classes)

        self.relu = nn.ReLU()
        self.dropout1 = nn.Dropout(p=0.3)
        self.dropout2 = nn.Dropout(p=0.4)

    def forward(self, x):
        x = self.relu(self.bn1(self.layer1(x)))
        x = self.dropout1(x)
        x = self.relu(self.bn2(self.layer2(x)))
        x = self.dropout2(x)
        x = self.relu(self.bn3(self.layer3(x)))
        x = self.dropout2(x)
        x = self.relu(self.bn4(self.layer4(x)))
        x = self.dropout2(x)
        x = self.relu(self.bn5(self.layer5(x)))
        x = self.dropout1(x)
        x = self.output(x)
        return x

In [59]:
input_dim = np.array(df['embeddings'][1]).shape[0]
num_classes = 5
model = MulticlassClassificationModel(input_dim, num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3)

In [60]:
def train_model(train_loader, model, criterion, optimizer, scheduler, num_epochs=100):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels.long())
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)
        epoch_loss = running_loss / len(train_loader.dataset)
        scheduler.step(epoch_loss)
        if epoch % 10 == 0:
            print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}')
            print(f'Current Learning Rate: {scheduler.get_last_lr()[0]:.6f}')
    print("Training Complete")

In [61]:
def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for data, targets in test_loader:
            data, targets = data.to(device), targets.to(device)
            outputs = model(data)
            _, predicted = torch.max(outputs, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()

    accuracy = 100 * correct / total
    print(f"Test Accuracy: {accuracy:.2f}%")
    return accuracy

In [62]:
train_model(train_loader, model, criterion, optimizer, scheduler, num_epochs=100)
evaluate_model(model, test_loader)

Epoch 1/100, Loss: 1.0722
Current Learning Rate: 0.001000
Epoch 11/100, Loss: 0.1671
Current Learning Rate: 0.000500
Epoch 21/100, Loss: 0.0280
Current Learning Rate: 0.000500
Epoch 31/100, Loss: 0.0475
Current Learning Rate: 0.000250
Epoch 41/100, Loss: 0.0235
Current Learning Rate: 0.000125
Epoch 51/100, Loss: 0.0210
Current Learning Rate: 0.000031
Epoch 61/100, Loss: 0.0159
Current Learning Rate: 0.000008
Epoch 71/100, Loss: 0.0134
Current Learning Rate: 0.000002
Epoch 81/100, Loss: 0.0155
Current Learning Rate: 0.000000
Epoch 91/100, Loss: 0.0216
Current Learning Rate: 0.000000
Training Complete
Test Accuracy: 78.57%


78.57142857142857

## **Input hendling**

In [63]:
def process_text(related_texts):
    def preprocess_text(text):
        text = text.lower()
        text = text.translate(str.maketrans('', '', string.punctuation.replace("'", "")))
        text = re.sub(r'[^a-zA-Z0-9\s\']', '', text)
        text = re.sub(r'\s+', ' ', text).strip()
        return text

    def preprocess_texts(text_list):
        processed_texts = [preprocess_text(text) for text in text_list]
        return ' '.join(processed_texts)

    preprocessed_text = preprocess_texts(related_texts)
    return preprocessed_text

In [73]:
input_text = """
Fuqarolik holati dalolatnomalarini yozish organlarida ish yuritish tartibi to‘g‘risidagi yo‘riqnomaga kiritilayotgan o‘zgartirishlar va qo‘shimcha
1. 5-bandning beshinchi xatboshisidagi “Vazirlar Mahkamasining 2022-yil 25-fevraldagi 89-son qarori bilan tasdiqlangan O‘limni qayd qilish bilan bog‘liq kompozit davlat xizmatlarini ko‘rsatishning ma’muriy reglamentiga” degan so‘zlar “Vazirlar Mahkamasining 2023-yil 20-oktabrdagi 550-son qarori bilan tasdiqlangan O‘limni qayd qilish bilan bog‘liq kompozit davlat xizmatlarini ko‘rsatishning ma’muriy reglamentiga” degan so‘zlar bilan almashtirilsin.
2. 11-bandning uchinchi xatboshisi quyidagi tahrirda bayon etilsin:
“Vazirlar Mahkamasining 2023-yil 20-oktabrdagi 550-son qarori bilan tasdiqlangan Fuqarolik holati dalolatnomalarini qayd etish qoidalarining 4-bandiga asosan FHDY organlari, O‘zbekiston Respublikasining konsullik muassasalari va fuqarolar yig‘inlari fuqarolik holati dalolatnomalarining yozuv blanklari va boshqa hujjatlar bilan Vazirlik, shuningdek O‘zbekiston Respublikasi Tashqi ishlar vazirligi tomonidan ta’minlanadi.”.
3. 23-bandning ikkinchi xatboshisidan “seriyasi, tartib raqamlari qirqib olinib, dalolatnomaning birinchi nusxasiga yelimlab qo‘yiladi. Guvohnomalarning qolgan qismlari” degan so‘zlar chiqarib tashlansin.
4. Quyidagi mazmundagi 351-band bilan to‘ldirilsin:
“351. Ariza beruvchi bola tug‘ilganligi haqida guvohnomani olish uchun O‘zbekiston Respublikasi Yagona interaktiv davlat xizmatlari portali (bundan buyon matnda YIDXP deb yuritiladi) orqali elektron so‘rovnoma to‘ldiradi yoki bevosita FHDY organiga murojaat qiladi.
Bunda, FHDY organi xodimi elektron so‘rovnoma kelib tushgandan keyin bir ish kuni ichida bolaga tug‘ilganlik haqida guvohnoma rasmiylashtiradi hamda uni ariza beruvchiga taqdim etadi.
Ariza beruvchi xohishiga ko‘ra, tug‘ilganlik haqida guvohnomani pochta aloqasi orqali olishi mumkin. Bunda, pochta xarajatlari ariza beruvchi hisobidan qoplanadi.”.
5. 38-bandning ikkinchi xatboshisi quyidagi tahrirda bayon etilsin:
“Agar tug‘ilgan bolaning onasi voyaga yetmagan bo‘lsa, tug‘ilishni qayd etish bolaning tug‘ilganligi haqida tibbiy ma’lumotnoma hamda vasiylik va homiylik organining arizasiga muvofiq FHDY organi tomonidan qayd etiladi. Bunda, bolani tuqqan onaning shaxsini tasdiqlovchi hujjati yoki tug‘ilganlik haqidagi guvohnomasi bilan o‘qish joyidan ma’lumotnoma taqdim etilishi lozim.”.
6. 65-bandning ikkinchi xatboshisidagi “Yagona interaktiv davlat xizmatlari portali (bundan buyon matnda YIDXP deb yuritiladi)” degan so‘zlar “YIDXP” qisqartmasi bilan almashtirilsin.
7. 102-bandning ikkinchi xatboshisi quyidagi tahrirda bayon etilsin:
“O‘lim holati Vazirlar Mahkamasining 2023-yil 20-oktabrdagi 550-son qarori bilan tasdiqlangan O‘limni qayd qilish bilan bog‘liq kompozit davlat xizmatlarini ko‘rsatishning ma’muriy reglamentiga muvofiq elektron axborot tizimida kompozit davlat xizmati shaklida qayd etilganda, tibbiyot muassasalari tomonidan o‘lim holati FHDY organlarida qayd qilinganligi haqida QR-kod (matrik shtrixli kod) tasviri tushirilgan ma’lumotnoma ariza beruvchilarga ikki nusxada chop etib beriladi.”.

"""

In [65]:
# Classification function
def classify_text(input_text, model, label_map, api_key):
    preprocessed_text = process_text(input_text)
    embedding = embed_text_co(preprocessed_text, api_key)
    tensor = torch.tensor(embedding, dtype=torch.float32).unsqueeze(0)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    tensor = tensor.to(device)
    model.to(device)

    model.eval()
    with torch.no_grad():
        output = model(tensor)
        probabilities = torch.softmax(output, dim=1).cpu().numpy()

    inverted_label_map = {v: k for k, v in label_map.items()}
    class_probabilities = {inverted_label_map.get(i, "other"): prob for i, prob in enumerate(probabilities[0])}
    sorted_class_probabilities = dict(sorted(class_probabilities.items(), key=lambda item: item[1], reverse=True))

    return sorted_class_probabilities

In [74]:
class_probabilities = classify_text(input_text, model, label_map, api_key_co)
for i in class_probabilities:
  print(f"{i} {class_probabilities[i]}")

04.05.00.00 Ota-onalar qarovisiz qolgan bolalarni tarbiyalash shakllari 0.6089843511581421
04.08.00.00 Oila, onalik, otalik va bolalikni himoya qilish va ijtimoiy qo‘llab-quvvatlash 0.272761732339859
04.06.00.00 Fuqarolik holati dalolatnomalarini qayd qilish (shuningdek, 03.02.08.00ga qarang) 0.05793171375989914
04.02.00.00 Nikoh 0.04897312447428703
other 0.011349089443683624
