In [1]:
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_name="facebook/mbart-large-50-many-to-many-mmt"

In [3]:
tokenizer = MBart50TokenizerFast.from_pretrained(model_name)
tokenizer.src_lang = "ne_NP"

model = MBartForConditionalGeneration.from_pretrained(model_name, torch_dtype="float16")

ImportError: 
 requires the protobuf library but it was not found in your environment. Checkout the instructions on the
installation page of its repo: https://github.com/protocolbuffers/protobuf/tree/master/python#installation and follow the ones
that match your environment. Please note that you may need to restart your runtime after installation.


In [4]:
continuous_sentence = "गौरा पर्वजस्ता मेलाहरूले हावा संगीत नृत्य र भक्तिले भरिन्छन्"

In [5]:
target_lang = "en_XX"

In [None]:
print(tokenizer.tokenize(continuous_sentence))

In [None]:
print(tokenizer.encode(continuous_sentence))

In [8]:
inputs = tokenizer(continuous_sentence, return_tensors="pt")

In [None]:
inputs['input_ids']

In [None]:
inputs['attention_mask']

In [11]:
generated_tokens = model.generate(
    **inputs, forced_bos_token_id=tokenizer.lang_code_to_id[target_lang]
)

In [None]:
generated_tokens

In [None]:
tokenizer.decode(330)

In [14]:
english_translation = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)

In [None]:
english_translation

In [None]:
print(dir(model))

In [None]:
model

In [18]:
decoder = model.get_decoder()

In [None]:
(decoder.layers[0].self_attn.out_proj.weight)

In [22]:
import torch
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from transformers import MBartForConditionalGeneration, MBartTokenizer

In [23]:
def visualize_attention(attn_weights, title):
    plt.figure(figsize=(10, 8))
    sns.heatmap(attn_weights, cmap='viridis', annot=False)
    plt.title(title)
    plt.xlabel("Tokens")
    plt.ylabel("Tokens")
    plt.show()

In [24]:
def visualize_embeddings(embeddings, title):
    plt.figure(figsize=(10, 5))
    sns.heatmap(embeddings[:50].cpu().detach().numpy(), cmap="coolwarm")  # Limiting to first 50 tokens
    plt.title(title)
    plt.xlabel("Embedding Dimension")
    plt.ylabel("Token Index")
    plt.show()

In [25]:
def visualize_logit_distribution(logits):
    plt.figure(figsize=(8, 5))
    sns.histplot(logits.cpu().detach().numpy().flatten(), bins=50, kde=True)
    plt.title("Logit Distribution Before Softmax")
    plt.xlabel("Logit Value")
    plt.ylabel("Frequency")
    plt.show()

In [26]:
def process_text(text, model, tokenizer):
    inputs = tokenizer(text, return_tensors="pt")
    with torch.no_grad():
        outputs = model(**inputs, output_hidden_states=True, output_attentions=True)
    return inputs, outputs

In [None]:
# model_name = "facebook/mbart-large-50-many-to-many-mmt"
model_name = "Strange18/results"
model = MBartForConditionalGeneration.from_pretrained(model_name)
tokenizer = MBart50TokenizerFast.from_pretrained(model_name)

text = "नेपाल सुन्दर देश हो।"  # Example Nepali text
inputs, outputs = process_text(text, model, tokenizer)

In [None]:
# Visualizing Token Embeddings
embed_tokens = model.model.encoder.embed_tokens.weight
visualize_embeddings(embed_tokens, "Token Embeddings")

In [None]:
attn_weights = outputs.cross_attentions[0][0][0]  # First layer, first attention head
visualize_attention(attn_weights, "Self-Attention Heatmap")

In [None]:
# Visualizing Attention Weights
attn_weights = outputs.decoder_attentions[0][0][0]  # First layer, first attention head
visualize_attention(attn_weights, "Self-Attention Heatmap")

In [None]:
# Visualizing Attention Weights
attn_weights = outputs.encoder_attentions[0][0][0]  # First layer, first attention head
visualize_attention(attn_weights, "Self-Attention Heatmap")

In [None]:
# Visualizing Logits Before Softmax
logits = outputs.logits[0]
visualize_logit_distribution(logits)

In [None]:
# Visualizing Positional Embeddings
pos_embed = model.model.decoder.embed_positions.weight
visualize_embeddings(pos_embed, "Positional Embeddings")

In [None]:
# Visualizing Positional Embeddings
pos_embed = model.model.encoder.embed_positions.weight
visualize_embeddings(pos_embed, "Positional Embeddings")

In [None]:
hidden_states = outputs.decoder_hidden_states
hidden_states = torch.stack(hidden_states).squeeze(1).mean(dim=2).cpu().numpy()
plt.figure(figsize=(12, 6))
sns.heatmap(hidden_states.T, cmap="Blues")
plt.title("Hidden States Evolution Across Layers")
plt.xlabel("Layer Number")
plt.ylabel("Token Index")
plt.show()

In [None]:
hidden_states = outputs.encoder_hidden_states
hidden_states = torch.stack(hidden_states).squeeze(1).mean(dim=2).cpu().numpy()
plt.figure(figsize=(12, 6))
sns.heatmap(hidden_states.T, cmap="Blues")
plt.title("Hidden States Evolution Across Layers")
plt.xlabel("Layer Number")
plt.ylabel("Token Index")
plt.show()

In [None]:
layer_norm = model.model.encoder.layernorm_embedding.weight.cpu().detach().numpy()
plt.figure(figsize=(8, 5))
sns.histplot(layer_norm, bins=50, kde=True)
plt.title("Layer Normalization Weight Distribution")
plt.xlabel("Value")
plt.ylabel("Frequency")
plt.show()


In [None]:
dir(outputs)

In [None]:
outputs.keys()

In [20]:
# print(translate_nepali_to_english('गौरा पर्वजस्ता मेलाहरूले हावा संगीत नृत्य र भक्तिले भरिन्छन्'))
# print(translate_nepali_to_english("आज म धेरै खुसी छु"))