In [2]:
%%capture
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps "xformers<0.0.27" "trl<0.9.0" peft accelerate bitsandbytes

### **Fine-Tuning Llama 3.1-8B**

**Loading Model and Tokenizer using Unsloth**

In [None]:
from unsloth import FastLanguageModel
import torch
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Meta-Llama-3.1-8B-bnb-4bit",
    max_seq_length = 2048,
    dtype = None,
    load_in_4bit = True)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
==((====))==  Unsloth 2024.8: Fast Llama patching. Transformers = 4.44.0.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.3.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.26.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.70G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/230 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/50.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/345 [00:00<?, ?B/s]

**Tokenizer Setup**

In [None]:
from unsloth.chat_templates import get_chat_template
tokenizer = get_chat_template(
    tokenizer,
    mapping={"role": "from", "content": "value", "user": "human", "assistant": "gpt"},
    chat_template="chatml",
)

Unsloth: Will map <|im_end|> to EOS = <|end_of_text|>.


**Testing Llama 3.1 before Fine-Tuning**

In [None]:
from transformers import TextStreamer
FastLanguageModel.for_inference(model)

messages = [
    {"from": "human", "value": "Is 9.11 larger than 9.9?"},
]
inputs = tokenizer.apply_chat_template(
    messages,
    tokenize=True,
    add_generation_prompt=True,
    return_tensors="pt",
).to("cuda")

text_streamer = TextStreamer(tokenizer)
_ = model.generate(input_ids=inputs, streamer=text_streamer, max_new_tokens=50, use_cache=True)

<|im_start|>user
Is 9.11 larger than 9.9?<|im_end|>
<|im_start|>assistant
Is 9.11 larger than 9.9? (1) Yes, 9.11 is larger than 9.9. (2) No, 9.11 is not larger than 9.9.
Is 9


**Loading Training Dataset-mlabonne/FineTome-100k**

In [20]:
from datasets import load_dataset

def apply_template(examples):
    messages = examples["conversations"]
    text = [tokenizer.apply_chat_template(message, tokenize=False, add_generation_prompt=False) for message in messages]
    return {"text": text}

dataset = load_dataset("mlabonne/FineTome-100k", split="train")
dataset = dataset.map(apply_template, batched=True)

Unsloth: Will map <|im_end|> to EOS = <|im_end|>.


Downloading readme:   0%|          | 0.00/982 [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/117M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/100000 [00:00<?, ? examples/s]

Map:   0%|          | 0/100000 [00:00<?, ? examples/s]

**LoRA Configuration for Language Model Fine-Tuning**

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 8,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 42,
    use_rslora = False,
    loftq_config = None,
)

Unsloth 2024.8 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


**Training Configuration for Language Model Fine-Tuning**

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

training_args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        max_steps = 50,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
    )

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = 2048,
    dataset_num_proc = 2,
    packing = False,
    args = training_args,
)

Map (num_proc=2):   0%|          | 0/100000 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs


In [None]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 100,000 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 50
 "-____-"     Number of trainable parameters = 20,971,520


Step,Training Loss
1,1.193
2,1.0816
3,1.6662
4,1.2101
5,1.1936
6,1.1385
7,0.922
8,1.1251
9,0.8747
10,0.7893


Step,Training Loss
1,1.193
2,1.0816
3,1.6662
4,1.2101
5,1.1936
6,1.1385
7,0.922
8,1.1251
9,0.8747
10,0.7893


In [None]:
model

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(128256, 4096)
        (layers): ModuleList(
          (0-31): 32 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Identity()
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=8, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): lora.Linear4bit(
        

**Testing Fine-Tuned LLM**

In [None]:
from transformers import TextStreamer
FastLanguageModel.for_inference(model)

messages = [
    {"from": "human", "value": "Is 9.11 larger than 9.9?"},
]
inputs = tokenizer.apply_chat_template(
    messages,
    tokenize=True,
    add_generation_prompt=True,
    return_tensors="pt",
).to("cuda")

text_streamer = TextStreamer(tokenizer)
_ = model.generate(input_ids=inputs, streamer=text_streamer, max_new_tokens=50, use_cache=True)

<|im_start|>user
Is 9.11 larger than 9.9?<|im_end|>
<|im_start|>assistant
Yes, 9.11 is larger than 9.9.<|im_end|>


### **Saving LLM and pushing to HuggingFace Repo**

In [None]:
# model.save_pretrained("qlora_model")
# tokenizer.save_pretrained("qlora_model")

('qlora_model/tokenizer_config.json',
 'qlora_model/special_tokens_map.json',
 'qlora_model/tokenizer.json')

In [None]:
# !huggingface-cli repo create Meta-Llama3.1-8B-4bit-Fine-Tuned

In [None]:
# model.push_to_hub("mecha2019/Meta-Llama3.1-8B-4bit-Fine-Tuned" )
# tokenizer.push_to_hub("mecha2019/Meta-Llama3.1-8B-4bit-Fine-Tuned")

### **Model Evaluation**

In [None]:
# !pip install evaluate bert_score rouge_score

**Loading Fine-Tuned Model from HuggingFace**

In [3]:
from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer
import torch
model = AutoPeftModelForCausalLM.from_pretrained(
    "mecha2019/Meta-Llama3.1-8B-4bit-Fine-Tuned",
    load_in_4bit = True,
    torch_dtype=torch.float16,
)
tokenizer = AutoTokenizer.from_pretrained("mecha2019/Meta-Llama3.1-8B-4bit-Fine-Tuned")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


adapter_config.json:   0%|          | 0.00/733 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.41k [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.
Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
`low_cpu_mem_usage` was None, now set to True since model is quantized.


model.safetensors:   0%|          | 0.00/5.70G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/230 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/50.9k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/83.9M [00:00<?, ?B/s]

**Selecting 10 Test Examples from Dataset**

In [58]:
test = dataset['conversations'][4999:5009]

In [59]:
test

[[{'from': 'human',
   'value': 'Explain the process of how code written in Java is compiled and run using a virtual machine. Describe the role of the virtual machine in this process and how it interacts with the hardware and operating system of the computer. Use examples to illustrate your points.'},
  {'from': 'gpt',
   'value': 'Java is a high-level programming language that was created in the 1990s by Sun Microsystems, which is now owned by Oracle Corporation. It is a popular programming language because of its portability, as it can run on multiple platforms such as Windows, Mac, and Linux. The process of compiling and running Java code involves three main steps: writing the code, compiling the code, and running the program on a virtual machine. \n\n1. Writing the code: Java code is written in a text editor, such as Notepad, Atom, or Eclipse. The code is saved with the .java extension, and the file name must match the name of the class containing the main method.\n\n2. Compiling t

**Getting References from Test Examples**

In [None]:
references = []
for expected in test:
    references.append(text[-1]['value'])

**Predicting Output using Fine-Tuned Model**

In [83]:
from transformers import TextStreamer
from unsloth import FastLanguageModel
FastLanguageModel.for_inference(model)

predictions = []
for message in test:
    messages = [message[0]]
    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt",
    ).to("cuda")

    text_streamer = TextStreamer(tokenizer)
    response = model.generate(input_ids=inputs, streamer=text_streamer,temperature=0.1, max_new_tokens=200, use_cache=True)
    r = tokenizer.decode(response[0], skip_special_tokens=True)
    start_index = r.find("assistant") + len("assistant\n")
    assistant_response = r[start_index:].strip()
    predictions.append(assistant_response)

<|im_start|>user
Explain the process of how code written in Java is compiled and run using a virtual machine. Describe the role of the virtual machine in this process and how it interacts with the hardware and operating system of the computer. Use examples to illustrate your points.<|im_end|>
<|im_start|>assistant
The process of compiling and running Java code involves several steps:

1. Compilation: The Java compiler, javac, takes the source code written in Java and converts it into bytecode, which is a set of instructions that can be executed by the Java Virtual Machine (JVM). The bytecode is saved in a file with the extension.class.

2. Class Loading: The JVM loads the bytecode into memory and creates an instance of the class. This process involves loading the class definition, initializing the class, and creating an instance of the class.

3. Execution: The JVM executes the bytecode instructions, which are interpreted by the JVM. The JVM interacts with the hardware and operating sy

In [85]:
len(predictions)

10

In [46]:
references = ['Java is a high-level programming language that was created in the 1990s by Sun Microsystems, which is now owned by Oracle Corporation. It is a popular programming language because of its portability, as it can run on multiple platforms such as Windows, Mac, and Linux. The process of compiling and running Java code involves three main steps: writing the code, compiling the code, and running the program on a virtual machine. \n\n1. Writing the code: Java code is written in a text editor, such as Notepad, Atom, or Eclipse. The code is saved with the .java extension, and the file name must match the name of the class containing the main method.\n\n2. Compiling the code: Once the code has been written, it needs to be compiled into bytecode using a Java compiler. The compiler checks for syntax errors and translates the Java code into bytecode, which is a low-level code that can be executed by the Java Virtual Machine (JVM). The bytecode is saved with the .class extension and can be run on any platform with a JVM. For example, to compile a Java program in the command line, you can use the following command:\n\n```\njavac HelloWorld.java\n```\nThis command will compile the file HelloWorld.java into the bytecode HelloWorld.class.\n\n3. Running the program on a virtual machine: The Java bytecode is executed on a virtual machine, which is a software implementation of a computer that runs the bytecode. The JVM interprets the bytecode and translates it into machine language instructions that can be executed by the operating system and hardware of the computer. The JVM provides a level of abstraction that allows Java programs to be run on any platform with a JVM. For example, to run a Java program in the command line, you can use the following command:\n\n```\njava HelloWorld\n```\nThis command will start the JVM and run the bytecode in the file HelloWorld.class.\n\nThe virtual machine plays a crucial role in the process of compiling and running Java code. It provides a layer of abstraction between the Java program and the underlying hardware and operating system. The JVM interacts with the operating system and hardware by providing a standard interface that allows the Java program to access system resources, such as the file system, network, and memory. The JVM also manages the memory allocation and garbage collection for the Java program.\n\nFor example, suppose you have a Java program that reads a file from the file system and prints its contents to the console. The JVM provides a standard interface to access the file system, and the operating system handles the actual reading and writing of the file. The JVM also manages the memory allocation for the program, which ensures that the program does not exceed the available memory and cause the system to crash.\n\nIn conclusion, the process of compiling and running Java code involves writing the code, compiling it into bytecode, and running it on a virtual machine. The virtual machine provides a level of abstraction that allows the Java program to run on any platform with a JVM. The JVM interacts with the hardware and operating system by providing a standard interface and managing the memory allocation and garbage collection for the program.',

'Step 1: The volume of a cube is given by the formula V = s^3, where s is the length of the side of the cube.\nStep 2: Given that the volume of the cube is 1000 cc, we can find the side length using the formula s = V^(1/3).\nStep 3: Substitute the given volume of 1000 cc into the formula to find the side length: s = 1000^(1/3) = 10.\nStep 4: To find the surface area of the cube, we use the formula A = 6s^2, where A is the surface area and s is the length of the side.\nStep 5: Substitute the value of s into the formula to find the surface area: A = 6 * 10^2 = 600 square units.\n\nAnswer:\n\\boxed{600}',

'The effect of temperature on the surface tension of water is that as the temperature increases, the surface tension decreases. This happens because the increased temperature leads to an increase in the kinetic energy of water molecules. As a result, the molecules move more rapidly and the intermolecular forces holding them together at the surface become weaker. Consequently, the surface tension decreases.\n\nTo measure the effect of temperature on the surface tension of water accurately, you can use the following methods:\n\n1. Capillary rise method: In this method, a thin capillary tube is dipped into the water, and the height to which the water rises in the tube is measured. The surface tension can be calculated using the formula: γ = (hρgR)/2, where γ is the surface tension, h is the height of the liquid column, ρ is the density of the liquid, g is the acceleration due to gravity, and R is the radius of the capillary tube. This experiment can be performed at different temperatures to observe the effect on surface tension.\n\n2. Wilhelmy plate method: This method involves immersing a thin, flat plate (usually made of glass or platinum) vertically into the water and measuring the force required to detach it from the water surface. The surface tension can be calculated by dividing the measured force by the perimeter of the plate. This experiment can also be performed at different temperatures to study the effect on surface tension.\n\n3. Maximum bubble pressure method: In this method, a gas is bubbled through a small orifice submerged in the water, and the pressure of the gas is measured when the maximum size of the bubble is reached just before detachment. The surface tension can be calculated using the formula: γ = (Pd)/4, where γ is the surface tension, P is the pressure of the gas, and d is the diameter of the orifice. This experiment can be conducted at various temperatures to observe the effect on surface tension.\n\n4. Drop weight or drop volume method: This method involves measuring the weight or volume of individual droplets of water detaching from the end of a thin tube. The surface tension can be calculated using the formula: γ = (mgR)/2, where γ is the surface tension, m is the mass of the droplet, g is the acceleration due to gravity, and R is the radius of the tube. This experiment can be performed at different temperatures to study the effect on surface tension.\n\nBy using any of these methods and performing the experiments at different temperatures, you can accurately measure the effect of temperature on the surface tension of water.',

'Equilibrium is an important concept in economics, describing the state where the supply and demand for a particular product reach a balance. In economic terms, equilibrium refers to the point where the market forces are in balance, and there is no excess demand or supply for goods or services.\n\nThis concept is illustrated by the supply and demand curve, which shows the relationship between the price of a product and the quantity that consumers and producers are willing or able to buy or sell respectively. When there is an increase in demand, suppliers will raise their prices to take advantage of the fact that consumers are willing to pay more. As prices rise, the quantity supplied also increases until the market eventually reaches a new equilibrium point, where the quantity supplied equals the quantity demanded.\n\nOn the other hand, if there is an increase in supply, prices will decrease to attract more consumers. As prices fall, the quantity demanded increases until the market reaches a new equilibrium point, where the quantity supplied equals the quantity demanded.\n\nEquilibrium is important in economics because it represents the most efficient allocation of resources in a market, where prices reflect the true cost of production and the true value that consumers place on the goods or services. When markets are not in equilibrium, there can be either a surplus or a shortage of goods, leading to inefficiencies and market distortion. In the long term, market forces tend to bring the market back to equilibrium, ensuring that resources are allocated efficiently.',

"Overflow occurs in binary arithmetic when the result of an operation exceeds the representable range of a given data type, typically in fixed-width binary numerals like signed integers. To determine overflow systematically, consider the following:\n\n1. Ignore any carry out from the most significant bit, as well as carry in, as they are not relevant to detecting overflow.\n2. Examine the most significant bits (MSBs) of the two input numbers and the output:\n\n   Case 1: If the input numbers have different MSBs (one is positive, the other is negative):\n       - There will never be overflow, as the result will always fall within the representable range.\n\n   Case 2: If the input numbers have the same MSB:\n       - If the output's MSB is the same as the inputs' MSB, the result is correct, and there is no overflow.\n       - If the output's MSB is different from the inputs' MSB, there was an overflow.\n\nFor instance, consider the following three-bit additions:\n- $111 + 111 + 1 = 1111$ (overflow, since inputs are negative and output is positive)\n- $111 + 111 = 1110$ (no overflow, since inputs and output are negative)\n- $110 + 010 = 100$ (no overflow, despite different input signs, as the result is within the range)",'You can achieve this by writing a Java program that utilizes the `split()` method to split the input string into individual words, and then iterate through the words to find the longest one. Here\'s an example solution:\n\n```java\npublic class Main {\n    public static void main(String[] args) {\n        String input = "Hello World!";\n        String[] words = input.split(" ");\n        int maxLength = 0;\n        \n        for (String word : words) {\n            int length = word.length();\n            \n            if (length > maxLength) {\n                maxLength = length;\n            }\n        }\n        \n        System.out.println("The length of the longest word is " + maxLength);\n    }\n}\n```\n\nIn this solution, we first declare a variable `input` and assign it the value "Hello World!". Then, we utilize the `split()` method on the input string, passing a space as the delimiter, which splits the string into an array of words. \n\nWe initialize `maxLength` to 0, which will store the maximum length of a word. \n\nNext, we iterate through each word in the `words` array using a for-each loop. For each word, we get its length using the `length()` method and store it in the `length` variable.\n\nInside the loop, we compare the length of the current word with the `maxLength` value. If the length is greater, we update `maxLength` with the new length. \n\nFinally, we print out the result using the `println()` method, concatenating the string "The length of the longest word is " with the `maxLength` value.',

'Bioluminescence, the ability of an organism to produce and emit light, has evolved independently in various marine organisms, including bacteria, dinoflagellates, jellyfish, crustaceans, and fish. The selective pressures that drove the evolution of bioluminescence in marine organisms can be attributed to several factors, which contributed to their survival and adaptation in their environment.\n\n1. Predation and Defense: Bioluminescence can serve as a defense mechanism against predators. Some organisms, like the deep-sea shrimp, eject bioluminescent fluids to confuse or deter predators. Others, like the firefly squid, use bioluminescence to blend in with the light coming from the surface, making them less visible to predators from below (counter-illumination). This camouflage technique helps them avoid predation and increases their chances of survival.\n\n2. Prey Attraction: Bioluminescence can also be used to attract prey. Some deep-sea anglerfish have a bioluminescent lure that they use to attract smaller fish and other prey. The light-emitting lure entices the prey to come closer, making it easier for the anglerfish to capture them.\n\n3. Communication and Mating: Bioluminescence can serve as a means of communication between individuals of the same species. For example, some species of ostracods (small crustaceans) use bioluminescent signals to find and attract mates. This increases their chances of successful reproduction, which is essential for the survival of the species.\n\n4. Symbiosis: Some marine organisms form symbiotic relationships with bioluminescent bacteria. For example, the Hawaiian bobtail squid has a specialized light organ that houses bioluminescent bacteria. The squid provides the bacteria with nutrients, while the bacteria emit light that helps the squid camouflage itself from predators. This mutualistic relationship benefits both organisms and enhances their chances of survival.\n\n5. Adaptation to low-light environments: Many bioluminescent organisms live in deep-sea environments where sunlight is scarce or absent. The ability to produce light allows these organisms to navigate, find food, and communicate in the darkness. This adaptation is crucial for their survival in such extreme environments.\n\nIn conclusion, the evolution of bioluminescence in marine organisms has been driven by various selective pressures, including predation, defense, prey attraction, communication, mating, symbiosis, and adaptation to low-light environments. The ability to produce and emit light has contributed significantly to the survival and adaptation of these organisms in their respective environments, making bioluminescence a fascinating and essential evolutionary trait.',

"Step 1:  The goal is to find the number of minutes before midnight. This can be calculated as the difference between the current time and midnight in minutes.\nStep 2:  To calculate the number of minutes before midnight, we'll take the difference between 23:59 and the current time in minutes.\nStep 3:  We'll iterate through each test case, calculate the difference, and print the result for each case.\nStep 4:  The time input should be in the range of 0 ≤ h < 24 and 0 ≤ m < 60, and we'll account for this in our logic.\n```python\n# Step 5: \nimport sys\n\n# Step 6:  Function to read input from the standard input\ninput = sys.stdin.readline\n\n# Step 7:  Define a function to read an integer from the input\ndef inp():\n    return (int(input()))\n\n# Step 8:  Define a function to read two space-separated integers from the input and return them as a list\ndef invr():\n    return (map(int, input().split()))\n\n# Step 9:  Read the number of test cases\nt = inp()\n# Step 10:  Iterate through each test case\nfor i in range(t):\n    # Step 11:  Read the current time in hours and minutes\n    \n    h, m = invr()\n    # Step 12:  Calculate the number of minutes before midnight\n    \n    x = 23 - h\n    y = 60 - m\n    ans = (x * 60) + y\n    # Step 13:  Print the number of minutes before midnight for the current test case\n    \n    print(ans)\n\n```",

'While it is true that bullying has existed for thousands of years, it does not necessarily mean that bullying is an innate part of human nature or that it cannot be changed. The human experience is shaped by various socio-cultural, economic, environmental, and political factors. These factors influence human behavior, including bullying behavior. Additionally, what is considered acceptable behavior varies across cultures and times.\n\nIn modern societies, there has been a growing awareness of the negative impact of bullying, both on the bullied individuals and the broader society. As a result, many schools and communities have adopted programs and policies to prevent bullying. Through education, awareness-raising, and proactive intervention, it is possible to change social norms and attitudes towards bullying, and to create a culture that promotes respect, kindness, and empathy. \n\nWhile it may not be possible to completely eliminate bullying, we can strive to create a society where bullying is less accepted and less frequent, and where everyone can feel safe, respected, and valued. By working together towards this goal, we can help to ensure a better future for all.',

'Most galaxies, particularly spiral galaxies, have a disk-like structure due to the dynamics of their formation and gravitational forces. As a galaxy forms, the rotating cloud of gas and dust collapses under its own gravity. This collapse occurs more easily along the plane of the rotation, leading to the development of a flat, rotating disk. Stars and other celestial objects within the disk are primarily influenced by their angular momentum, causing them to orbit the galactic center in a two-dimensional plane.\n\nIn contrast, elliptical galaxies have a more spherical or ellipsoidal shape, which is a result of multiple smaller galaxies merging together, causing their stars to become randomly distributed in space. The analogy with electron orbits in an atom is not applicable here because the scales, forces, and dynamics involved in galactic structures are vastly different from those in atomic systems.']

In [25]:
references

['Java is a high-level programming language that was created in the 1990s by Sun Microsystems, which is now owned by Oracle Corporation. It is a popular programming language because of its portability, as it can run on multiple platforms such as Windows, Mac, and Linux. The process of compiling and running Java code involves three main steps: writing the code, compiling the code, and running the program on a virtual machine. \n\n1. Writing the code: Java code is written in a text editor, such as Notepad, Atom, or Eclipse. The code is saved with the .java extension, and the file name must match the name of the class containing the main method.\n\n2. Compiling the code: Once the code has been written, it needs to be compiled into bytecode using a Java compiler. The compiler checks for syntax errors and translates the Java code into bytecode, which is a low-level code that can be executed by the Java Virtual Machine (JVM). The bytecode is saved with the .class extension and can be run on 

In [26]:
len(references)

10

In [45]:
predictions = ["The process of compiling and running Java code involves several steps:\n\n1. Compilation: The Java compiler, javac, takes the source code written in Java and converts it into bytecode, which is a set of instructions that can be executed by the Java Virtual Machine (JVM). The bytecode is saved in a file with the extension .class.\n\n2. Class Loading: The JVM loads the bytecode into memory and creates an instance of the class. This process involves loading the class definition, initializing the class, and creating an instance of the class.\n\n3. Execution: The JVM executes the bytecode instructions, which are interpreted by the JVM. The JVM interacts with the hardware and operating system to perform the necessary tasks, such as memory allocation, file I/O, and thread management.\n\n4. Garbage Collection: The JVM automatically manages the memory used by the program, freeing up unused memory and preventing memory leaks.\n\nThe virtual machine plays a crucial role in this process by providing a platform-independent environment for executing Java code.",
               "Step 1: The volume of a cube is given as 1000 cc. We need to find the surface area of the cube.\nStep 2: The formula for the surface area of a cube is 6 × side².\nStep 3: We can solve for the side of the cube by using the formula for the volume of a cube, which is side³ = 1000.\nStep 4: Taking the cube root of both sides, we get side = 10.\nStep 5: Substituting side = 10 in the formula for surface area, we get 6 × 10² = 600.\nStep 6: The surface area of the cube is 600 cm².\nStep 7: Presenting the final answer as \\boxed{600 cm²}.",
               "The effect of temperature on the surface tension of water is that it decreases as the temperature increases. This can be measured accurately using a tensiometer, which measures the force required to break the surface tension of a liquid.",
               "Equilibrium in economics refers to a state of balance or stability in the economy, where the supply of goods and services matches the demand for them. It is a situation where the prices of goods and services are such that the quantity supplied is equal to the quantity demanded. In other words, equilibrium occurs when the market is in balance, and there is no tendency for prices or quantities to change.",
               "Overflow occurs when the result of a binary arithmetic operation exceeds the maximum representable value for the data type being used. It can be determined systematically by checking the carry and overflow flags in the processor's status register after each operation.",
               "You can use a loop to iterate through the words in the input string, and keep track of the length of the longest word. Here's an example code:\n\n```java\nimport java.util.*;\npublic class Main {\n    public static void main(String[] args) {\n        Scanner sc = new Scanner(System.in);\n        String input = sc.nextLine();\n        int maxLength = 0;\n        String longestWord = \"\";\n        String[] words = input.split(\" \");\n        for (String word : words) {\n            if (word.length() > maxLength) {\n                maxLength = word.length();\n                longestWord = word;\n            }\n        }\n        System.out.println(\"The longest word is: \" + longestWord);\n        System.out.println(\"Its length is: \" + maxLength);\n    }\n}\n```\n\nIn this code, we first split the input string into individual words using the `split()` method. Then, we loop through each word and check if its length is greater than the current maximum length. If",
               "Bioluminescence is a phenomenon where living organisms produce and emit light. It is a common trait among marine organisms, including fish, squid, and jellyfish. The selective pressures that drove the evolution of bioluminescence in marine organisms can be attributed to several factors:\n\n1. Predation: Bioluminescence can serve as a warning signal to potential predators, making the organism less attractive as prey. This can be particularly beneficial for small organisms that are easy targets for larger predators.\n2. Communication: Bioluminescence can be used for communication purposes, such as attracting mates or warning other members of the same species of danger.\n3. Navigation: Some marine organisms use bioluminescence to navigate in the dark, such as deep-sea fish that use their light to find food or avoid obstacles.\n4. Camouflage: Some marine organisms use bioluminescence to blend in with their surroundings, making them less visible to predators.\n5. Mimicry: Some marine organisms use",
               "Step 1: Read the input and store the time in variables h and m.\nStep 2: Calculate the number of minutes before the New Year by subtracting the current time from 24 hours and 60 minutes.\nStep 3: Print the result.\n```python\n# Step 1\nh, m = map(int, input().split())\n# Step 2\nminutes_before_new_year = (24 - h) * 60 + (60 - m)\n# Step 3\nprint(minutes_before_new_year)\n```\n",
               "Bullying can be prevented in schools by implementing various strategies and policies. Some effective measures include:\n1. Creating a positive school culture: Schools can promote a positive and inclusive environment by implementing anti-bullying programs, encouraging respect and empathy, and fostering a sense of community among students.\n2. Establishing clear policies and procedures: Schools should have clear policies and procedures in place to address bullying incidents, including reporting mechanisms, consequences for offenders, and support for victims.\n3. Training staff and students: Staff members should receive training on how to identify and respond to bullying, while students should be educated on the negative effects of bullying and how to stand up against it.\n4. Encouraging bystander intervention: Schools can promote a culture of intervention by encouraging students to report bullying incidents and supporting them in standing up for their peers.\n5. Providing support for victims: Schools should have support systems in place to help victims of bullying, such as counseling services, peer support groups, and individualized support.",
               "The formation of galaxies is a complex process that involves the gravitational collapse of gas and dust, as well as the influence of external factors such as collisions with other galaxies or the presence of a supermassive black hole at the center. The resulting shape of a galaxy depends on the balance between these forces and the initial conditions of the gas and dust.\n\nIn general, galaxies tend to have a disk-like structure because the gravitational collapse of gas and dust is more efficient in forming a flat, rotating disk than a spherical shape. This is because the gravitational force is stronger in the plane of the disk, where the gas and dust are more concentrated, than in the perpendicular direction. Additionally, the rotation of the disk helps to stabilize the structure against gravitational collapse, as the centrifugal force counteracts the gravitational force.\n\nThe formation of a spherical shape, on the other hand, requires a more delicate balance of forces. In the early stages of galaxy formation, the gas and dust may be distributed more uniformly, and the"
               ]

In [38]:
len(predictions)

10

In [1]:
from evaluate import load

**BertScore**

In [5]:
bertscore = load("bertscore")
results = bertscore.compute(predictions=predictions, references=references, lang="en")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [6]:
import pandas as pd
pd.DataFrame(results).drop('hashcode',axis=1)

Unnamed: 0,precision,recall,f1
0,0.890003,0.851815,0.87049
1,0.912006,0.886882,0.899268
2,0.916565,0.801991,0.855459
3,0.911202,0.856344,0.882922
4,0.894535,0.787629,0.837685
5,0.947115,0.915525,0.931052
6,0.917015,0.869889,0.89283
7,0.893431,0.823703,0.857151
8,0.861371,0.852911,0.85712
9,0.875136,0.86481,0.869942


**Rouge**

In [8]:
rouge = load('rouge')
results =  rouge.compute(
    predictions=predictions,
    references=references,
    use_aggregator=True,
    use_stemmer=True,
)

In [9]:
import pandas as pd
pd.Series(results)

Unnamed: 0,0
rouge1,0.404003
rouge2,0.185948
rougeL,0.251385
rougeLsum,0.355022


**Bleu Score**

In [13]:
bleu = load("bleu")
results = bleu.compute(predictions=predictions, references=references)
import pandas as pd
pd.Series(results)

Unnamed: 0,0
bleu,0.078037
precisions,"[0.6750902527075813, 0.36727272727272725, 0.22..."
brevity_penalty,0.253823
length_ratio,0.421742
translation_length,1385
reference_length,3284


In [15]:
results['precisions']

[0.6750902527075813,
 0.36727272727272725,
 0.2271062271062271,
 0.15867158671586715]

**Exact Match (EM)**

In [35]:
exact_match = load("exact_match")
results = exact_match.compute(references=references, predictions=predictions)
print(round(results["exact_match"], 2))

0.0
