In [96]:
import pandas as pd
import torch
from time import time
import os
import torch.nn as nn
import pickle
import re
import evidently
from evidently import Report
from evidently.presets.classification import ClassificationPreset
from evidently.presets.drift import DataDriftPreset
import pandas as pd
from evidently import Dataset
from evidently import DataDefinition
from evidently import MulticlassClassification

In [97]:
#last class er MyABSAService file tar directory ta copy kore nisi to load the model_weights.pth and vocab.pkl files
ROOT_DIR = r'C:\Users\User\Downloads\60 days of python\day-38(Aspect base sentiment analysis)\MyABSAService'

In [98]:
dictionary_path = ROOT_DIR + r'\vocab.pkl'
model_path = ROOT_DIR + r'\model_weights.pth'

In [99]:
token_2_id=None
with open(ROOT_DIR + r'\vocab.pkl', "rb") as f:
    token_2_id = pickle.load(f)

In [100]:
# Normalize
def normalize(text):
    text = text.lower()
    text = re.sub(r'[^a-z0-9\s]', '', text)
    text = ' '.join(text.split())
    return text

In [101]:
# Tokenize
def tokenize(text):
    tokens = text.split()
    return tokens

In [102]:
# Convert tokens into ids
def convert_tokens_2_ids(tokens):
    input_ids = [
        token_2_id.get(token, token_2_id['<UNK>']) for token in tokens
    ]
    return input_ids

In [103]:
# Process an input text
def process_text(text, aspect):
    text_aspect_pair = text + ' ' + aspect
    normalized_text = normalize(text_aspect_pair)
    tokens = tokenize(normalized_text)
    input_ids = convert_tokens_2_ids(tokens)
    input_ids = torch.tensor(input_ids).unsqueeze(0)
    return input_ids

In [104]:
# ABSA Model
class ABSA(nn.Module):
    def __init__(self, vocab_size, num_labels=3):
        super(ABSA, self).__init__()
        self.vocab_size = vocab_size
        self.num_labels = num_labels
        self.embedding_layer = nn.Embedding(
            num_embeddings=vocab_size, embedding_dim=256
        )
        self.lstm_layer = nn.LSTM(
            input_size=256,
            hidden_size=512,
            batch_first=True,
        )

        self.fc_layer = nn.Linear(
            in_features=512,
            out_features=self.num_labels
        )

    def forward(self, x):
        embeddings = self.embedding_layer(x)
        lstm_out, _ = self.lstm_layer(embeddings)
        logits = self.fc_layer(lstm_out[:, -1, :])
        return logits

model = ABSA(
    vocab_size=len(token_2_id.keys()),
    num_labels=3
)
model.load_state_dict(
    torch.load(model_path)
)
model.eval()
print("Model loaded successfully")

Model loaded successfully


In [105]:
SENTIMENT_LABELS={
            0: 'negative',
            1: 'neutral',
            2: 'positive',
        }
def log_prediction(text, aspect, sentiment, label, confidence, inference_time):
    metrics = pd.read_csv(
        ROOT_DIR + r'\inference_metric.csv',

    )
    new_row = {
        'timestamp': pd.Timestamp.now(),#returns the current date and time (timestamp) of your system.
        'text': text,
        'aspect': aspect,
        'sentiment': sentiment, # Negative, Neutral, Positive
        'target': "",
        'prediction': label, # 0, 1, 2
        'confidence': confidence, # probability [0.15, 0.65, 0.20]
        'inference_time': inference_time,
    }
    metrics = pd.concat(
        [metrics, pd.DataFrame([new_row])],
        ignore_index=True
    )

    try:
        metrics.to_csv(ROOT_DIR + r'\inference_metric.csv', index=False)
    except Exception as e:
        print(e)
def predict_sentiment(text, aspect):
    start_time = time()
    input_ids = process_text(text, aspect)
    with torch.no_grad():
        logits = model(input_ids)
        inference_time = time() - start_time#How long the model takes to make a prediction for one input.
        probs=torch.softmax(logits, dim=-1)#[0.1,0.5,0.6]
        label=probs.argmax(dim=-1).item()#[0.6 ]
        sentiment =SENTIMENT_LABELS[label]
        confidence=probs.squeeze().tolist()[label]

        log_prediction(text, aspect, sentiment, label, confidence, inference_time)
        return {"sentiment":sentiment, "confidence":confidence}


In [106]:
batch=[
    {"input_ids": [5, 7, 9], "label": 1},
    {"input_ids": [4, 3], "label": 0},
    {"input_ids": [10, 11, 12, 13], "label": 1}
]

batch_input_ids = [item['input_ids'] for item in batch]
batch_labels = [item['label'] for item in batch]
print(batch_input_ids)
print(batch_labels)

[[5, 7, 9], [4, 3], [10, 11, 12, 13]]
[1, 0, 1]


# **Model Monitoring**

In [122]:
reference_df=pd.read_csv(ROOT_DIR+r'\reference_data.csv')
current_df=pd.read_csv(ROOT_DIR+r'\inference_metric.csv')
reference_df

Unnamed: 0,timestamp,text,aspect,sentiment,target,prediction,confidence,inference_time
0,49:34.9,The food was great but the service was terribl...,food,neutral,1,1,0.840034,0.062681
1,49:35.0,The food was great but the service was terribl...,service,negative,0,0,0.850334,0.002503
2,49:35.0,The food was great but the service was terribl...,ambience,positive,2,2,0.860276,0.003001


In [123]:
columns=["target","prediction"]#only jei column gulo monitor korte cai
reference_df=reference_df[columns]
current_df=current_df[columns]
current_df

Unnamed: 0,target,prediction
0,2,2
1,2,2
2,1,2
3,2,2
4,2,2
5,2,1
6,2,2
7,0,2
8,2,2
9,2,1


# ekta problem hocche amdr model ta always predicion hisebe always positive predict kore jar karone evidently onk error show kore...sei jonno ekhane ami manually csv file ta te predicion column e kichu change korsi to to see the report

In [124]:
# Prepare data for monitoring
reference_data = Dataset.from_pandas(
    reference_df,
    data_definition=DataDefinition(  classification=[
        MulticlassClassification(
            target="target",
            prediction_labels="prediction"
        )
    ]),
)
current_data = Dataset.from_pandas(
    current_df,
    data_definition=DataDefinition(  classification=[
        MulticlassClassification(
            target="target",
            prediction_labels="prediction"
        )
    ]),
)


In [125]:
# Create Performance Report
classification_report = Report(
    metrics=[ClassificationPreset()],
    include_tests=True,
)

classification_result = classification_report.run(
    reference_data=reference_data,
    current_data=current_data,
)

classification_result.save_html(ROOT_DIR + r"\classification_report.html")

In [126]:
# Create DataDrift Report #means datar distribution e kono change asche kina check kora.jemon training er somoy "poisitve,negative,neutral" ei 3 ta sentmient er j distribution chilo sei distribution ta  inference er somoy change hoise kina seta check korar dorkar ase
drift_report=Report(
    metrics=[DataDriftPreset()]

)
# Create DataDrift Report
drift_report = Report(
    metrics=[DataDriftPreset()])

datadrift_result = drift_report.run(
    reference_data=reference_data,
    current_data=current_data,
)
datadrift_result.save_html(ROOT_DIR + r"\drift_report.html")
