In [None]:
%pip install huggingface_hub
%pip install tokenizers
%pip install datasets
%pip install -U transformers

from google.colab import userdata

# Get the Hugging Face token from userdata (if set)
hf_token = userdata.get('HF_TOKEN')

# Use the token to log in
from huggingface_hub import login
login(hf_token)

Collecting datasets
  Downloading datasets-3.3.0-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Downloading datasets-3.3.0-py3-none-any.whl (484 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m484.9/484.9 kB[0m [31m9.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading multiprocess-0.70.16-py311-none-any.whl (143 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.5/143.5 kB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading x

In [None]:
!git clone https://huggingface.co/R0b000/IELTS_DECODER_ONLY

Cloning into 'IELTS_DECODER_ONLY'...
remote: Enumerating objects: 25, done.[K
remote: Counting objects: 100% (25/25), done.[K
remote: Compressing objects: 100% (24/24), done.[K
remote: Total 25 (delta 3), reused 0 (delta 0), pack-reused 0 (from 0)[K
Unpacking objects: 100% (25/25), 29.36 KiB | 969.00 KiB/s, done.
Filtering content: 100% (5/5), 454.51 MiB | 45.41 MiB/s, done.


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import json
from transformers import RobertaTokenizerFast
from tokenizers import ByteLevelBPETokenizer
from transformers import TextStreamer
from tokenizers import Tokenizer

# Load model configuration
CONFIG_PATH = "/content/IELTS_DECODER_ONLY/config.json"
VOCAB_FILE = "/content/IELTS_DECODER_ONLY/vocab.json"
MERGES_FILE = "/content/IELTS_DECODER_ONLY/merges.txt"
MODEL_PATH = "/content/IELTS_DECODER_ONLY/pytorch_model.bin"
checkpoint_path = "/content/IELTS_DECODER_ONLY/checkpoint_0.pt"
tokenizer_path = "/content/IELTS_DECODER_ONLY/tokenizer.json"

def load_config(config_path):
    with open(config_path, "r") as f:
        return json.load(f)

def load_vocab(vocab_path):
    with open(vocab_path, "r") as f:
        return json.load(f)

config = load_config(CONFIG_PATH)
vocab = load_vocab(VOCAB_FILE)
n_embd, n_head, block_size, vocab_size = config["n_embd"], config["n_head"], config["block_size"], config["vocab_size"]
eos_token_id = vocab["<EOS>"]

tokenizer = Tokenizer.from_file(tokenizer_path)

# Define Transformer Model
class TransformerModel(nn.Module):
    def __init__(self, n_embd, n_head, vocab_size, block_size):
        super().__init__()
        self.token_embedding_table = nn.Embedding(vocab_size, n_embd)  # Adjust vocab size if needed
        self.position_embedding_table = nn.Embedding(block_size, n_embd)
        self.blocks = nn.Sequential(*[Block(n_embd, n_head) for _ in range(6)])
        self.ln_f = nn.LayerNorm(n_embd)
        self.head = nn.Linear(n_embd, vocab_size)  # Output logits for vocab

    def forward(self, x):
            tok_emb = self.token_embedding_table(x)
            pos_emb = self.position_embedding_table(torch.arange(x.shape[1], device=x.device))
            x = tok_emb + pos_emb
            x = self.blocks(x)
            x = self.ln_f(x)
            logits = self.head(x) # Get the logits
            return logits, None

    def generate(self, idx, max_new_tokens):
        for _ in range(max_new_tokens):
            idx_cond = idx[:, -block_size:]
            logits, _ = self(idx_cond)
            logits = logits[:, -1, :]
            probs = F.softmax(logits, dim=-1)
            idx_next = torch.multinomial(probs, num_samples=1)
            idx = torch.cat((idx, idx_next), dim=1)

            if any(token.item() == eos_token_id for token in idx_next):
                break

        return idx

    def generate_with_streaming(self, idx, max_new_tokens):
        streamer = TextStreamer(tokenizer)  # Create a TextStreamer instance

        for _ in range(max_new_tokens):
            idx_cond = idx[:, -block_size:]
            logits, _ = self(idx_cond)
            logits = logits[:, -1, :]
            probs = F.softmax(logits, dim=-1)
            idx_next = torch.multinomial(probs, num_samples=1)
            idx = torch.cat((idx, idx_next), dim=1)

            # Stream the token
            streamer.put(idx_next[0])

            if eos_token_id is not None and (idx_next == eos_token_id).any():
                break

        streamer.end()  # Ensure proper termination
        return tokenizer.decode(idx.tolist()[0], skip_special_tokens=True)  # Return the final generated text

# Define Transformer Components
class Head(nn.Module):
    def __init__(self, n_embd, n_head):
        super().__init__()
        head_size = n_embd // n_head
        self.key = nn.Linear(n_embd, head_size, bias=False)
        self.query = nn.Linear(n_embd, head_size, bias=False)
        self.value = nn.Linear(n_embd, head_size, bias=False)
        self.register_buffer("tril", torch.tril(torch.ones(block_size, block_size)))

    def forward(self, x):
        k, q, v = self.key(x), self.query(x), self.value(x)
        wei = q @ k.transpose(-2, -1) * (k.shape[-1] ** -0.5)
        wei = wei.masked_fill(self.tril[: x.shape[1], : x.shape[1]] == 0, float("-inf"))
        wei = F.softmax(wei, dim=-1)
        return wei @ v

class MultiHeadAttention(nn.Module):
    def __init__(self, n_embd, n_head):
        super().__init__()
        self.heads = nn.ModuleList([Head(n_embd, n_head) for _ in range(n_head)])
        self.proj = nn.Linear(n_embd, n_embd)

    def forward(self, x):
        return self.proj(torch.cat([h(x) for h in self.heads], dim=-1))

class FeedForward(nn.Module):
    def __init__(self, n_embd):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(n_embd, 4 * n_embd),
            nn.ReLU(),
            nn.Linear(4 * n_embd, n_embd),
        )

    def forward(self, x):
        return self.net(x)

class Block(nn.Module):
    def __init__(self, n_embd, n_head):
        super().__init__()
        self.sa = MultiHeadAttention(n_embd, n_head)
        self.ffwd = FeedForward(n_embd)
        self.ln1 = nn.LayerNorm(n_embd)
        self.ln2 = nn.LayerNorm(n_embd)

    def forward(self, x):
        x = x + self.sa(self.ln1(x))
        return x + self.ffwd(self.ln2(x))

# Load model and weights
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = TransformerModel(n_embd, n_head, vocab_size, block_size).to(device)

checkpoint = torch.load(checkpoint_path, map_location=device, weights_only=True)

checkpoint['model_state_dict']['head.weight'] = checkpoint['model_state_dict'].pop('lm_head.weight')
checkpoint['model_state_dict']['head.bias'] = checkpoint['model_state_dict'].pop('lm_head.bias')

model.load_state_dict(checkpoint['model_state_dict'])

# Print checkpoint keys to understand its structure
print("Checkpoint keys:", checkpoint.keys())

# If model_state_dict exists in the checkpoint, inspect the keys in it
if 'model_state_dict' in checkpoint:
    print("Model state dict keys:", checkpoint['model_state_dict'].keys())

model_state_dict = model.state_dict()
print("Model's expected keys:", model_state_dict.keys())

# Check the model's state dict for key sizes to verify their correctness
if 'model_state_dict' in checkpoint:
    for name, param in checkpoint['model_state_dict'].items():
        print(f"{name}: {param.shape}")

# Load model state_dict
model = TransformerModel(n_embd, n_head, vocab_size, block_size).to(device)

try:
    model.load_state_dict(checkpoint['model_state_dict'])
    print("Checkpoint loaded successfully.")
except Exception as e:
    print(f"Error loading checkpoint: {e}")

model = torch.compile(model)
model.eval()

Checkpoint keys: dict_keys(['model_state_dict', 'optimizer_state_dict', 'iteration', 'losses'])
Model state dict keys: odict_keys(['token_embedding_table.weight', 'position_embedding_table.weight', 'blocks.0.sa.heads.0.tril', 'blocks.0.sa.heads.0.key.weight', 'blocks.0.sa.heads.0.query.weight', 'blocks.0.sa.heads.0.value.weight', 'blocks.0.sa.heads.1.tril', 'blocks.0.sa.heads.1.key.weight', 'blocks.0.sa.heads.1.query.weight', 'blocks.0.sa.heads.1.value.weight', 'blocks.0.sa.heads.2.tril', 'blocks.0.sa.heads.2.key.weight', 'blocks.0.sa.heads.2.query.weight', 'blocks.0.sa.heads.2.value.weight', 'blocks.0.sa.heads.3.tril', 'blocks.0.sa.heads.3.key.weight', 'blocks.0.sa.heads.3.query.weight', 'blocks.0.sa.heads.3.value.weight', 'blocks.0.sa.heads.4.tril', 'blocks.0.sa.heads.4.key.weight', 'blocks.0.sa.heads.4.query.weight', 'blocks.0.sa.heads.4.value.weight', 'blocks.0.sa.heads.5.tril', 'blocks.0.sa.heads.5.key.weight', 'blocks.0.sa.heads.5.query.weight', 'blocks.0.sa.heads.5.value.weight'

OptimizedModule(
  (_orig_mod): TransformerModel(
    (token_embedding_table): Embedding(275, 384)
    (position_embedding_table): Embedding(256, 384)
    (blocks): Sequential(
      (0): Block(
        (sa): MultiHeadAttention(
          (heads): ModuleList(
            (0-5): 6 x Head(
              (key): Linear(in_features=384, out_features=64, bias=False)
              (query): Linear(in_features=384, out_features=64, bias=False)
              (value): Linear(in_features=384, out_features=64, bias=False)
            )
          )
          (proj): Linear(in_features=384, out_features=384, bias=True)
        )
        (ffwd): FeedForward(
          (net): Sequential(
            (0): Linear(in_features=384, out_features=1536, bias=True)
            (1): ReLU()
            (2): Linear(in_features=1536, out_features=384, bias=True)
          )
        )
        (ln1): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
        (ln2): LayerNorm((384,), eps=1e-05, elementwise_affine=

### Example 1

In [None]:
# Example input
input_text = """
Prompt: Some people believe that the best way to reduce crime is to give longer prison sentences. Others believe there are better alternatives. Discuss both views and give your opinion.

Essay:
Crime is a major issue in many societies today, and it is widely debated how best to tackle it. While some argue that longer prison sentences are the most effective deterrent, others suggest that there are alternative methods of reducing crime. In this essay, I will examine both viewpoints before offering my own opinion.

On one hand, proponents of longer prison sentences argue that such measures ensure that criminals are removed from society for longer periods, reducing the likelihood of reoffending. In addition, the fear of long prison terms may discourage potential offenders from engaging in criminal activities. They believe that the justice system must be firm to maintain order and safety.

On the other hand, opponents of this approach argue that incarceration alone does not address the root causes of crime, such as poverty, lack of education, and social inequality. They propose alternatives like rehabilitation programs, education, and community-based initiatives that focus on helping offenders reintegrate into society. These methods aim to reduce recidivism by tackling the social factors that contribute to criminal behavior.

In my opinion, while longer prison sentences may be necessary for certain severe crimes, they should not be the only solution. It is equally important to invest in rehabilitation and education programs to address the causes of crime and reduce the likelihood of reoffending. A balanced approach, combining punishment with rehabilitation, would likely be the most effective way to reduce crime in the long term.
"""

In [None]:
context = tokenizer.encode(input_text).ids  # ensure the special tokens are present
context_tensor = torch.tensor(context, dtype=torch.long, device=device).unsqueeze(0)

In [None]:
print(tokenizer.decode(context))


Prompt: Some people believe that the best way to reduce crime is to give longer prison sentences. Others believe there are better alternatives. Discuss both views and give your opinion.

Essay:
Crime is a major issue in many societies today, and it is widely debated how best to tackle it. While some argue that longer prison sentences are the most effective deterrent, others suggest that there are alternative methods of reducing crime. In this essay, I will examine both viewpoints before offering my own opinion.

On one hand, proponents of longer prison sentences argue that such measures ensure that criminals are removed from society for longer periods, reducing the likelihood of reoffending. In addition, the fear of long prison terms may discourage potential offenders from engaging in criminal activities. They believe that the justice system must be firm to maintain order and safety.

On the other hand, opponents of this approach argue that incarceration alone does not address the roo

In [None]:
generated_output = model.generate(context_tensor, max_new_tokens=2000)
print(tokenizer.decode(generated_output[0].tolist()))


Prompt: Some people believe that the best way to reduce crime is to give longer prison sentences. Others believe there are better alternatives. Discuss both views and give your opinion.

Essay:
Crime is a major issue in many societies today, and it is widely debated how best to tackle it. While some argue that longer prison sentences are the most effective deterrent, others suggest that there are alternative methods of reducing crime. In this essay, I will examine both viewpoints before offering my own opinion.

On one hand, proponents of longer prison sentences argue that such measures ensure that criminals are removed from society for longer periods, reducing the likelihood of reoffending. In addition, the fear of long prison terms may discourage potential offenders from engaging in criminal activities. They believe that the justice system must be firm to maintain order and safety.

On the other hand, opponents of this approach argue that incarceration alone does not address the roo

In [None]:
generated_output = model.generate_with_streaming(context_tensor, max_new_tokens=2000)


To conclude there is a kids a bad point of friends which are college with students also available matter life. The most up in they day wont global to the rise of development in scheduls to enhance the deservice of occupation.

Firstly if a low more which several students days life on the more dedicating harmful to find what their  house benefits of automote  the loze of modern. Whether topic them there is a vast of the first necessary to be these this problem on the purnnity of it.

To begin with the reason is of the factor money cycle skills is hardening to get depending as the scripts and same of tours are suggestion in school. The use of mistakes in the overall coherence and grammar accuracy. Additionally the candidate should focus on a more variety of sentence structures including simple compound and clarity.

Some specific If I may be maintain explanation my begin that it is considered as these parents discussion.On the other pI that the government causes say they have nowadays c

### Example 2

In [None]:
# Example input
input_text_1 = """
Prompt: Some people think that children should be given less homework. Others believe that homework is essential for a child's development. Discuss both views and give your opinion.

Essay:
The issue of whether children should be assigned less homework has sparked a great deal of debate. While some believe that reducing homework would benefit children’s well-being, others argue that homework is vital for academic success and personal growth. In this essay, I will explore both sides of the argument and offer my own perspective.

Supporters of less homework argue that children already have a busy schedule with school, extracurricular activities, and family commitments. Excessive homework can lead to stress, burnout, and a lack of free time, which is crucial for a child’s overall development. Furthermore, it is suggested that children need time to relax and engage in other activities, such as sports or hobbies, to develop a well-rounded personality.

On the contrary, those who advocate for homework argue that it plays a critical role in reinforcing what is taught in the classroom. Homework allows children to practice and apply what they have learned, leading to better retention and understanding. It also helps children develop important skills such as time management, discipline, and responsibility, which are essential for future success.

In my opinion, while homework is necessary for academic development, there should be a balance to ensure that children are not overwhelmed. Assigning reasonable amounts of homework that reinforce key concepts and encourage independent learning, without encroaching on their personal time, would be a better approach.
"""

In [None]:
context_1 = tokenizer.encode(input_text_1).ids  # ensure the special tokens are present
context_tensor_1 = torch.tensor(context_1, dtype=torch.long, device=device).unsqueeze(0)

In [None]:
generated_output_1 = model.generate(context_tensor_1, max_new_tokens=2000)
print(tokenizer.decode(generated_output_1[0].tolist()))


Prompt: Some people think that children should be given less homework. Others believe that homework is essential for a child's development. Discuss both views and give your opinion.

Essay:
The issue of whether children should be assigned less homework has sparked a great deal of debate. While some believe that reducing homework would benefit children’s well-being, others argue that homework is vital for academic success and personal growth. In this essay, I will explore both sides of the argument and offer my own perspective.

Supporters of less homework argue that children already have a busy schedule with school, extracurricular activities, and family commitments. Excessive homework can lead to stress, burnout, and a lack of free time, which is crucial for a child’s overall development. Furthermore, it is suggested that children need time to relax and engage in other activities, such as sports or hobbies, to develop a well-rounded personality.

On the contrary, those who advocate f

In [None]:
generated_output = model.generate_with_streaming(context_tensor_1, max_new_tokens=2000)


Many concentrash being advrtising the social comfortable for  not art of their it. It has discovered that more advantages in factors which the future academic to solve such as clear particular into reducing the subject education can be increased for a part of firstly students. However the response is cohesing in a smooth flow of ideas could be currently and be presented. However the transitions between paragraphs smooth could be improved.

Suggested Band Score: 6.5

 Overall Band Score:
- Would be the essay meets the requirements of a basic aspect.
- The paragraphs are not smooth and explicit the topic.
- The essay provides examples to support the arguments.
- The structure used is effective and exhibits.
- Suggested Band Score (Coherence and Cohesion): 7.0

 Lexical Resource (Vocabulary):
- The candidate uses a range of appropriate terms and repetition.
- There are some mistakes in vocabulary such as have 
- The sude of best ideas not say is true activity is instead of be an instead 

### Example 3

In [None]:
# Example input
input_text_2 = """
Prompt: Some people believe that climate change is the responsibility of individuals, while others think it is the government's responsibility. Discuss both views and give your opinion.

Essay:
Climate change is one of the most pressing issues of our time, and there is an ongoing debate about who should take responsibility for addressing it. While some argue that individuals must take responsibility for their own actions to combat climate change, others believe that the government should take the lead in tackling this global challenge. In this essay, I will examine both perspectives before offering my own opinion.

On one hand, those who argue that individuals should be responsible for fighting climate change contend that personal actions, such as reducing energy consumption, recycling, and using eco-friendly products, can have a significant impact. They believe that if everyone made small changes in their daily lives, the collective effect could contribute to a reduction in carbon emissions and the overall environmental footprint. Moreover, individual awareness and education can encourage others to adopt more sustainable lifestyles.

On the other hand, opponents of this view argue that climate change is a global issue that requires coordinated government action. They believe that governments have the power to enact policies that can regulate emissions, promote renewable energy, and invest in sustainable infrastructure. For example, governments can introduce laws to limit pollution, provide incentives for green technology, and fund research into alternative energy sources. Therefore, the responsibility for combating climate change should fall on policymakers who have the authority and resources to make systemic changes.

In my opinion, while individuals have a role to play in reducing their environmental impact, the government should take the lead in tackling climate change. Governments have the necessary tools, resources, and influence to implement large-scale changes that can make a significant difference. A joint effort between individuals and governments would be the most effective way to combat this global challenge.\
"""

In [None]:
context_2 = tokenizer.encode(input_text_2).ids  # ensure the special tokens are present
context_tensor_2 = torch.tensor(context_2, dtype=torch.long, device=device).unsqueeze(0)

In [None]:
generated_output_2 = model.generate(context_tensor_2, max_new_tokens=2000)
print(tokenizer.decode(generated_output_2[0].tolist()))

KeyboardInterrupt: 

In [None]:
generated_output = model.generate_with_streaming(context_tensor_2, max_new_tokens=2000)

 Also however the hinder can growth the arguments of the same many opposition of that it is a negative detail for good. As a result to people find assues them to every certain their paying one.These issues with a several impact on their aspects in our grammar.

Experience I am reason that my opinion my opinion is ta�t that much proper any investigational congating to replace the internet national experience. For example in Aos the research other than is a young general and ways of the surrounded and lack of professions society.

In conclusion It consumerting by empaning our decision has beneficial to conclusion a more balanced crime in a order may years the rapid of the community is a few exhibitive. It is believed that although nation is the subside of people to purpose.

Despite the action tasks can enhancing the environmental problem with replace. The financial nation is context. If this would also suggested to remery have their perspective increases social gudes and persed the worl

### Example 4

In [None]:
# Example input
input_text_3 = """
Prompt: Some people think that studying abroad is essential for students, while others believe that staying in their home country is a better option. Discuss both views and give your opinion.

Essay:
The decision of whether to study abroad or remain in one’s home country is a topic of much debate. Some people argue that studying abroad is essential for a student’s personal and academic growth, while others believe that staying in one’s home country is the better choice. This essay will examine both perspectives before offering my own opinion.

On one hand, those who advocate for studying abroad highlight the benefits of experiencing a new culture, broadening one’s horizons, and learning in a different academic environment. Exposure to diverse teaching methods and a global network of peers can provide students with unique opportunities for growth. Furthermore, studying abroad can help students develop independence, adaptability, and language skills, which are invaluable in today’s globalized job market.

On the other hand, those who believe that staying in one’s home country is preferable argue that students can receive a high-quality education without the added stress and financial burden of studying abroad. Staying at home allows students to remain close to family and friends, which can provide emotional support and stability. Moreover, in many countries, there are reputable universities that offer excellent education and research opportunities, making it unnecessary to leave the country.

In my opinion, studying abroad can offer valuable experiences that contribute to personal and academic growth. However, I also believe that it is important for students to weigh the financial costs, potential cultural challenges, and educational quality when making this decision. Ultimately, the best choice depends on individual circumstances, but studying abroad can offer distinct advantages for those who can afford the experience.
"""

In [None]:
context_3 = tokenizer.encode(input_text_3).ids  # ensure the special tokens are present
context_tensor_3 = torch.tensor(context_3, dtype=torch.long, device=device).unsqueeze(0)

In [None]:
generated_output_3 = model.generate(context_tensor_3, max_new_tokens=2000)
print(tokenizer.decode(generated_output_3[0].tolist()))


Prompt: Some people think that studying abroad is essential for students, while others believe that staying in their home country is a better option. Discuss both views and give your opinion.

Essay:
The decision of whether to study abroad or remain in one’s home country is a topic of much debate. Some people argue that studying abroad is essential for a student’s personal and academic growth, while others believe that staying in one’s home country is the better choice. This essay will examine both perspectives before offering my own opinion.

On one hand, those who advocate for studying abroad highlight the benefits of experiencing a new culture, broadening one’s horizons, and learning in a different academic environment. Exposure to diverse teaching methods and a global network of peers can provide students with unique opportunities for growth. Furthermore, studying abroad can help students develop independence, adaptability, and language skills, which are invaluable in today’s glob

In [None]:
generated_output = model.generate_with_streaming(context_tensor_3, max_new_tokens=2000)


Suggested Band Score (Grammatical Range and Accuracy): 6

Overall Considering the essays meeting the requirements of the essay.
- The essay is well-organized and demonstrates a good range of vocabulary and grammar.

 Feedback and Additional Comments:

Strengths:

- Clear and concise introduction body paragraphs and conclusion.
- Use more sophisticated to connect ideas and and ensuring a smooth flow of information.
- While the vocabulary and grammar the candidate could benefit from more time

Areas for Improvement:

- The candidate could improve their grammatical range and accuracy.
- The candidate has covered all parts of the task and supported arguments with examples.
- Suggested Overall Band Score: 6.5

 Feedback and Additional Comments:
Strengths:
- Clear and logical flow of information.
- Lexical Resource (Vocabulary)
- The candidate demonstrates a good range of vocabulary and sentence structures including complex and compound sentences.
- However there are a few minor grammatical

### Example 5

In [None]:
# Example input
input_text_4 = """
Prompt: Some people believe that technology has made communication between people easier, while others think it has created more barriers between people. Discuss both views and give your opinion.

Essay:
Advancements in technology have revolutionized communication in many ways, but they have also sparked debates about whether they have brought people closer together or created more barriers. Some believe that technology has made communication easier, while others argue that it has led to greater isolation. In this essay, I will discuss both views and offer my own opinion.

On one hand, technology has undoubtedly made communication easier and more accessible. With the advent of smartphones, social media, and instant messaging apps, people can now stay in touch with family, friends, and colleagues regardless of geographical distance. This has allowed for more frequent and convenient interactions, and has facilitated the exchange of ideas and information across borders. Additionally, video calls and virtual meetings have enabled people to collaborate and communicate professionally without the need for physical presence.

On the other hand, some argue that technology has created new barriers between people. Despite the increased accessibility of communication tools, face-to-face interaction has become less common, leading to a decline in meaningful, personal relationships. People may be more likely to engage with others online than in person, which can lead to feelings of loneliness and social isolation. Furthermore, constant reliance on technology can create distractions, leading to a lack of attention and empathy in conversations.

In my opinion, while technology has made communication more convenient, it is important to find a balance between online and offline interactions. Technology should be used to enhance relationships, not replace them. In this way, people can enjoy the benefits of technological communication while maintaining strong, personal connections with those around them.
"""

In [None]:
context_4 = tokenizer.encode(input_text_4).ids  # ensure the special tokens are present
context_tensor_4 = torch.tensor(context_4, dtype=torch.long, device=device).unsqueeze(0)

In [None]:
generated_output_4 = model.generate(context_tensor_4, max_new_tokens=2000)
print(tokenizer.decode(generated_output_4[0].tolist()))


Prompt: Some people believe that technology has made communication between people easier, while others think it has created more barriers between people. Discuss both views and give your opinion.

Essay:
Advancements in technology have revolutionized communication in many ways, but they have also sparked debates about whether they have brought people closer together or created more barriers. Some believe that technology has made communication easier, while others argue that it has led to greater isolation. In this essay, I will discuss both views and offer my own opinion.

On one hand, technology has undoubtedly made communication easier and more accessible. With the advent of smartphones, social media, and instant messaging apps, people can now stay in touch with family, friends, and colleagues regardless of geographical distance. This has allowed for more frequent and convenient interactions, and has facilitated the exchange of ideas and information across borders. Additionally, vid

In [None]:
generated_output = model.generate_with_streaming(context_tensor_4, max_new_tokens=2000)


The children if beings to discuss how are turning to be the level of their fame that its two get care for the come. Because the solution of the cost of the communication is the firstly of profession of consumers can underly due to the government and their point measures from that many celebrities spending to presented during to injurfactive actions or bstudents due to this towns of it.

In conclusion the same carbon maket in degrowing during new people needs to work on a children should tackle other parents. Despite the most essay who gath private In this realI think that very visits most satisfactoricate quickly less flower bublic leading newspapers to live statish which are always in future.

Secondly teenagers should help reducing occupation halthough in the environment and suggested Band Score (should be To sum up Consequence the country world computer peoples life cities in justmptivate in can be replaced with and not many methods to improve their accuracy to ensure vocabulary.
-

### Training Model

In [None]:
from datasets import load_dataset
Dataset = load_dataset('iamTangsang/Nepali-to-English-Translation-Dataset')

README.md:   0%|          | 0.00/568 [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/76.8M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/1.19M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/1.20M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/702697 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/10866 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/10867 [00:00<?, ? examples/s]

In [None]:
# Access the train and test splits
train_dataset = Dataset['train']
test_dataset = Dataset['test']

In [None]:
import pandas as pd

# Convert train_data and test_data to pandas DataFrame
train_df = pd.DataFrame(train_dataset)
train_df = train_df[:40]
test_df = pd.DataFrame(test_dataset)
test_df = test_df[:40]

In [None]:
# prompt: check for the null values in both the dataset of the prompt, essay and evaluation columns and if null values drop the row

# Check for null values in 'prompt', 'essay', and 'evaluation' columns of train_df
train_df = train_df.dropna(subset=['source', 'target'])

# Check for null values in 'prompt', 'essay', and 'evaluation' columns of test_df
test_df = test_df.dropna(subset=['source', 'target'])

In [None]:
# Combine the columns with <sep> and <eos>
train_df['combined'] = train_df['target'] + ' <SEP> ' + train_df['source'] + ' <EOS> '
test_df['combined'] = test_df['target'] + ' <SEP> ' + test_df['source'] + ' <EOS> '

# Save to input.txt
with open('input.txt', 'w') as file:
    for item in train_df['combined']:
        file.write("%s\n" % item)

with open('test.txt', 'w') as file:
    for item in test_df['combined']:
        file.write("%s\n" % item)

print("File saved as input.txt")
print("File saved as test.txt")

File saved as input.txt
File saved as test.txt


In [None]:
with open('/content/input.txt', 'r', encoding='utf-8') as f:
    text = f.read()

with open('/content/test.txt', 'r', encoding='utf-8') as f:
    test_text = f.read()

In [None]:
input = train_df['source'].iloc[35]
print(input)

फाइदाहरु : यस आसनले हातखुट्टा र कम्मरलाई लचिलो एवम् बलियो बनाउंछ र सुडौल पार्छ ।



In [None]:
encoded_text = tokenizer.encode(input).ids
print(encoded_text)

[160, 101, 108, 160, 101, 126, 160, 101, 233, 160, 101, 103, 160, 101, 126, 160, 101, 121, 160, 101, 112, 160, 102, 227, 224, 29, 224, 160, 101, 111, 160, 101, 120, 224, 160, 101, 232, 160, 101, 120, 160, 101, 105, 160, 101, 114, 160, 102, 233, 224, 160, 101, 121, 160, 101, 126, 160, 101, 101, 160, 101, 248, 160, 102, 227, 160, 101, 257, 160, 102, 239, 160, 101, 257, 160, 101, 126, 224, 160, 101, 112, 224, 160, 101, 247, 160, 101, 110, 160, 102, 239, 160, 101, 110, 160, 101, 112, 160, 101, 114, 160, 101, 126, 160, 101, 234, 224, 160, 101, 114, 160, 101, 252, 160, 101, 127, 160, 101, 114, 160, 102, 237, 224, 160, 101, 241, 160, 101, 117, 160, 101, 110, 160, 102, 239, 224, 160, 101, 109, 160, 101, 114, 160, 101, 127, 160, 101, 111, 160, 102, 237, 224, 160, 101, 109, 160, 101, 105, 160, 101, 126, 160, 101, 235, 160, 101, 228, 160, 101, 253, 224, 160, 101, 112, 224, 160, 101, 120, 160, 102, 227, 160, 101, 98, 160, 102, 238, 160, 101, 114, 224, 160, 101, 107, 160, 101, 126, 160, 101, 112, 1

In [None]:
print(tokenizer.decode(encoded_text))

फाइदाहरु : यस आसनले हातखुट्टा र कम्मरलाई लचिलो एवम् बलियो बनाउंछ र सुडौल पार्छ ।



In [None]:
from tokenizers import ByteLevelBPETokenizer
import torch
from collections import Counter

# Special tokens
special_tokens = ["<OOV>", "<PAD>", "<SEP>", "<EOS>"]

# Initialize and train tokenizer
new_tokenizer = ByteLevelBPETokenizer()

# Combine training text from input and evaluation columns
train_text = text
# print(train_text)

# Count characters and words
char_counter = Counter("".join(train_text))
print(f"Number of unique characters: {len(char_counter)}")

word_counter = Counter(" ".join(train_text).split())
vocab_size = len(word_counter)
print(f"Number of unique words: {vocab_size}")

# Train tokenizer
new_tokenizer.train_from_iterator(
    train_text,
    vocab_size=vocab_size,
    min_frequency=2,
    special_tokens=special_tokens
)

Number of unique characters: 123
Number of unique words: 121


In [None]:
old_vocab = tokenizer.get_vocab()
new_vocab = new_tokenizer.get_vocab()

print(old_vocab, new_vocab)

merged_vocab = {**old_vocab, **new_vocab}     #combine dictionaries

{';': 30, '²': 114, 'Ï': 143, 'H': 43, 'Í': 141, 'è¯į': 265, 'ĝ': 221, 'Ĝ': 220, 'ò': 178, 'Ċ': 202, '_': 66, '¤': 101, 'ą': 197, 'ĩ': 233, 'g': 74, 'Ò': 146, '~': 97, 'ï': 175, '\\': 63, 'º': 122, 'V': 57, '-': 16, 'Ü': 156, 'ļ': 252, '$': 7, 'Ä±': 274, '{': 94, '@': 35, 'î': 174, '=': 32, 'Ö': 150, 'ñ': 177, 'ķ': 247, 'Ĵ': 244, 'ô': 180, '³': 115, 'çĤ¹': 269, '£': 100, 'd': 71, '«': 108, 'Ã¢': 263, 'Ø': 152, '¥': 102, '*': 13, 'Ļ': 251, 'ì': 172, 'ľ': 254, 'Ð': 144, '»': 123, '×': 151, 'Æ': 134, 'Ã©': 262, 'æ': 166, 'Ą': 196, '°': 112, 'm': 80, 'Âł': 260, 'ģ': 227, 'R': 53, 'ĸ': 248, '<OOV>': 0, 'Ù': 153, 'e': 72, 'b': 69, 'µ': 117, 'ĵ': 245, 'ğ': 223, 'Į': 238, 'r': 85, '(': 11, "'": 10, 'Đ': 208, 'ł': 258, 'S': 54, 'u': 88, 'Å': 133, '"': 5, '¹': 121, '¿': 127, 'Ě': 218, 'Z': 61, '<PAD>': 1, 'z': 93, 'I': 44, 'ª': 107, 'Ú': 154, 'T': 55, 'U': 56, 'ü': 188, 'K': 46, 'Ł': 257, ',': 15, 'ě': 219, 'Ħ': 230, '¢': 99, '§Ĥ': 270, 'ö': 182, 'Á': 129, 'Ì': 140, 'M': 48, 'è¯': 264, 'ë': 171,

In [None]:
for token in new_vocab:
    if token not in old_vocab:
        tokenizer.add_tokens([token])

In [None]:
print(train_df[35:40])

                                               source  \
35  फाइदाहरु : यस आसनले हातखुट्टा र कम्मरलाई लचिलो...   
36                    यो अंग्रेजीको मात्रै कुरा होइन।   
37  `` बोका रेटनका सक्रीय ५५ वर्षीय व्यक्ति सिनियर...   
38  उनलाई सहयोग गर्नका लागि आसपासमा कोही पनि थिएनन् ।   
39  मैले यो उमेरसम्म यस्तो अवस्थाको माछापुच्छ्रे द...   

                                               target  \
35  Benefits : This posture makes the hands and th...   
36                        It is not just the English.   
37  `` An active 55-year-old in Boca Raton may car...   
38                     No one was around to help Him.   
39     I have never seen this species of fish before.   

                                             combined  
35  Benefits : This posture makes the hands and th...  
36  It is not just the English. <SEP> यो अंग्रेजीक...  
37  `` An active 55-year-old in Boca Raton may car...  
38  No one was around to help Him. <SEP> उनलाई सहय...  
39  I have never seen this species

In [None]:
input = train_df['source'].iloc[35]
print(input)

फाइदाहरु : यस आसनले हातखुट्टा र कम्मरलाई लचिलो एवम् बलियो बनाउंछ र सुडौल पार्छ ।



In [None]:
encoded_text = tokenizer.encode(input).ids
print(encoded_text)

[160, 101, 108, 160, 101, 126, 160, 101, 233, 160, 101, 103, 160, 101, 126, 160, 101, 121, 160, 101, 112, 160, 102, 227, 224, 29, 224, 160, 101, 111, 160, 101, 120, 224, 160, 101, 232, 160, 101, 120, 160, 101, 105, 160, 101, 114, 160, 102, 233, 224, 160, 101, 121, 160, 101, 126, 160, 101, 101, 160, 101, 248, 160, 102, 227, 160, 101, 257, 160, 102, 239, 160, 101, 257, 160, 101, 126, 224, 160, 101, 112, 224, 160, 101, 247, 160, 101, 110, 160, 102, 239, 160, 101, 110, 160, 101, 112, 160, 101, 114, 160, 101, 126, 160, 101, 234, 224, 160, 101, 114, 160, 101, 252, 160, 101, 127, 160, 101, 114, 160, 102, 237, 224, 160, 101, 241, 160, 101, 117, 160, 101, 110, 160, 102, 239, 224, 160, 101, 109, 160, 101, 114, 160, 101, 127, 160, 101, 111, 160, 102, 237, 224, 160, 101, 109, 160, 101, 105, 160, 101, 126, 160, 101, 235, 160, 101, 228, 160, 101, 253, 224, 160, 101, 112, 224, 160, 101, 120, 160, 102, 227, 160, 101, 98, 160, 102, 238, 160, 101, 114, 224, 160, 101, 107, 160, 101, 126, 160, 101, 112, 1

In [None]:
print(tokenizer.decode(encoded_text))

फाइदाहरु : यस आसनले हातखुट्टा र कम्मरलाई लचिलो एवम् बलियो बनाउंछ र सुडौल पार्छ ।

