# **OPT-125m**

In [1]:
!pip install transformers torch textstat language-tool-python

Collecting textstat
  Downloading textstat-0.7.4-py3-none-any.whl.metadata (14 kB)
Collecting language-tool-python
  Downloading language_tool_python-2.8.1-py3-none-any.whl.metadata (12 kB)
Collecting pyphen (from textstat)
  Downloading pyphen-0.17.0-py3-none-any.whl.metadata (3.2 kB)
Downloading textstat-0.7.4-py3-none-any.whl (105 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m105.1/105.1 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading language_tool_python-2.8.1-py3-none-any.whl (35 kB)
Downloading pyphen-0.17.0-py3-none-any.whl (2.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pyphen, textstat, language-tool-python
Successfully installed language-tool-python-2.8.1 pyphen-0.17.0 textstat-0.7.4


In [4]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Initialize the FALCON model and tokenizer
model_name = "facebook/opt-125m"  # FALCON model for instruction-based generation
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Function to generate articles using FALCON with improved sampling techniques
def generate_article(prompt, max_length=500, temperature=0.7, top_p=0.9, repetition_penalty=1.2):
    inputs = tokenizer(prompt, return_tensors="pt")
    attention_mask = torch.ones_like(inputs['input_ids'])  # Set attention mask
    outputs = model.generate(
        inputs['input_ids'],
        attention_mask=attention_mask,
        max_length=max_length,
        do_sample=True,
        temperature=temperature,  # Adjusts creativity
        top_p=top_p,  # Top-p sampling for more diversity
        repetition_penalty=repetition_penalty,  # Penalizes repetition
        num_return_sequences=1
    )
    article = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return article

# Example usage
prompt = "How does climate change affect agriculture in various regions around the world?"
article = generate_article(prompt)

print("Generated Article:\n")
print(article)

import language_tool_python
import textstat

# Initialize LanguageTool for grammar checking
tool = language_tool_python.LanguageTool('en-US')

# Performance Metric 1: Grammar Check
def grammar_check(article):
    matches = tool.check(article)
    errors = len(matches)
    return errors

# Performance Metric 2: Readability Score
def readability_score(article):
    score = textstat.flesch_reading_ease(article)
    return score

# Performance Metric 3: Word Count
def word_count(article):
    words = article.split()
    return len(words)

# Performance Metric 4: Perplexity for FALCON
def calculate_perplexity(article):
    inputs = tokenizer(article, return_tensors="pt")
    outputs = model(**inputs, labels=inputs["input_ids"])
    loss = outputs.loss.item()
    perplexity = torch.exp(torch.tensor(loss))
    return perplexity.item()

# Evaluate all metrics
def evaluate_article(article):
    errors = grammar_check(article)
    readability = readability_score(article)
    word_count_value = word_count(article)
    perplexity_value = calculate_perplexity(article)

    print(f"Performance Metrics:\n")
    print(f"Grammar Errors: {errors}")
    print(f"Readability Score: {readability}")
    print(f"Word Count: {word_count_value}")
    print(f"Perplexity: {perplexity_value}")

# Example usage
evaluate_article(article)

Generated Article:

How does climate change affect agriculture in various regions around the world?
Climate change impacts agricultural production and farming in different regions of the globe. According to the UN's Food and Agriculture Organisation, more than 70% of all food crops are affected by global warming. The number of countries with the largest and most affected areas is China (31%), India (22%) and Russia (15%).
In 2018, about 10 per cent of the total greenhouse gas emissions from agriculture were due to climate change. However, the amount of CO2 emitted in 2020 was lower than 2017, when it was about 20 per cent. Therefore, the rate at which CO2 is released by agriculture can be expected to increase as well.
As a result, there has been a rise in water pollution and increased deforestation. There have also been increases in air pollution, land use, biodiversity loss, and degradation of ecosystems. The main drivers for this trend include:
Increasing the amount of carbon dioxide