# NLP Tasks using LLM

## Please refer to the respective sections in the book for further details.


In [None]:
pip install -q transformers accelerate

In [None]:
from transformers import pipeline, AutoTokenizer
import torch

torch.manual_seed(0)
model = "tiiuae/falcon-7b-instruct"

tokenizer = AutoTokenizer.from_pretrained(model)
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

In [4]:
import torch

## Sentiment Analysis

In [4]:
torch.manual_seed(0)
prompt = """Classify the text into neutral, negative or positive.
Text: The scenery was breathtaking and the music was unforgettable. The entire experience of the concert was nothing short of magical.
Sentiment:
"""

sequences = pipe(
    prompt,
    max_new_tokens=10,
)

for seq in sequences:
    print(f"Result: {seq['generated_text']}")

Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


Result: Classify the text into neutral, negative or positive.
Text: The scenery was breathtaking and the music was unforgettable. The entire experience of the concert was nothing short of magical.
Sentiment:
Positive


In [28]:
prompt = """
Classify the sentiment of the text and the main factors contributing to it:
'The phone's battery life is terrible, and the camera is disappointing. Overall, I'm very unhappy with this purchase.'
Sentiment:
"""
sequences = pipe(prompt, max_new_tokens=100)
for seq in sequences:
    print(f"Result: {seq['generated_text']}")

Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


Result: 
Classify the sentiment of the text and the main factors contributing to it:
'The phone's battery life is terrible, and the camera is disappointing. Overall, I'm very unhappy with this purchase.'
Sentiment: 
- Negative
Main factors contributing to the sentiment:
- Poor battery life
- Disappointing camera quality


## Entity Extraction

In [None]:
torch.manual_seed(1)
prompt = """Return a list of named entities in the text.
Text: Mount Everest is the Earth's highest mountain above sea level, located in the Mahalangur Himal sub-range of the Himalayas.
Named entities:
"""

sequences = pipe(
    prompt,
    max_new_tokens=15,
    return_full_text = False,
)

for seq in sequences:
    print(f"{seq['generated_text']}")

Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


- Mount Everest
- Earth
- Himalayas


In [4]:
torch.manual_seed(1)
prompt = """Return a list of named entities in the text.
Text: CRISPR-Cas9 gene editing technology was heralded as a breakthrough in the field of genomics.
Named entities:
"""

sequences = pipe(
    prompt,
    max_new_tokens=15,
    return_full_text = False,
)

for seq in sequences:
    print(f"{seq['generated_text']}")


Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


- CRISPR-Cas9
- gene editing
- genomics


## Topic Modeling

In [None]:
torch.manual_seed(0)

prompt = """Extract the main topic from the text.
Text: As the digital age accelerates, the domain of cybersecurity is becoming increasingly paramount. The widespread deployment of internet-connected devices has led to a surge in potential points of exploitation, making it imperative for organizations to deploy stringent security measures. With the advent of technologies such as the Internet of Things, artificial intelligence, and cloud computing, the complexity of cyberattacks has risen, necessitating advanced defenses. The landscape of cyber threats continuously evolves, as do the tools and strategies to mitigate these risks, highlighting the ongoing arms race between cybercriminals and security professionals.
Main topic:
"""

sequences = pipe(
    prompt,
    max_new_tokens=15,  # Limit the number of new tokens generated
)

for seq in sequences:
    print(f"Result: {seq['generated_text']}")


Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


Result: Extract the main topic from the text.
Text: As the digital age accelerates, the domain of cybersecurity is becoming increasingly paramount. The widespread deployment of internet-connected devices has led to a surge in potential points of exploitation, making it imperative for organizations to deploy stringent security measures. With the advent of technologies such as the Internet of Things, artificial intelligence, and cloud computing, the complexity of cyberattacks has risen, necessitating advanced defenses. The landscape of cyber threats continuously evolves, as do the tools and strategies to mitigate these risks, highlighting the ongoing arms race between cybercriminals and security professionals.
Main topic:
The importance of cybersecurity in the digital age.


## Creative Writing

In [None]:
torch.manual_seed(0)

prompt = """Write a poem about the northern lights.
Poem:
"""

sequences = pipe(
    prompt,
    max_new_tokens=50,  # Increase the limit for new tokens to allow for a more detailed creative output
)

for seq in sequences:
    print(f"Result: {seq['generated_text']}")


Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


Result: Write a poem about the northern lights.
Poem:
A celestial ballet, a midnight show,
A cosmic dance, a shimmering glow.

The aurora, a dazzling array,
A beauty's story, a northern light display.

In the night sky, a magical glow,


## Text Summarization

In [None]:
torch.manual_seed(3)  # Ensure reproducibility with a fixed seed

prompt = """Renewable energy sources like solar and wind power are essential for combating climate change. They provide clean, inexhaustible energy that reduces reliance on fossil fuels, which are major contributors to global warming and environmental degradation. Transitioning to renewable energy not only helps in reducing carbon emissions but also promotes energy independence and supports sustainable development. Governments and businesses worldwide are investing in renewable energy technologies to secure a greener future.
Write a summary of the above text.
Summary:
"""

sequences = pipe(
    prompt,
    max_new_tokens=30,  # Limit the summary length
    do_sample=True,      # Enable sampling for diverse outcomes
    top_k=10,            # Sample from the top 10 most likely next tokens
    return_full_text=False,  # Return only the generated summary
)

for seq in sequences:
    print(f"{seq['generated_text']}")


Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


- Renewable energy sources like solar and wind power are vital in combating climate change.
- These sources provide clean, inexhaustible energy, reducing reliance


In [4]:
torch.manual_seed(3)  # Ensure reproducibility with a fixed seed

prompt = """Renewable energy sources like solar and wind power are essential for combating climate change. They provide clean, inexhaustible energy that reduces reliance on fossil fuels, which are major contributors to global warming and environmental degradation. Transitioning to renewable energy not only helps in reducing carbon emissions but also promotes energy independence and supports sustainable development. Governments and businesses worldwide are investing in renewable energy technologies to secure a greener future.
Write a factual summary of the above text without adding new information.
Summary:
"""

sequences = pipe(
    prompt,
    max_new_tokens=30,  # Limit the summary length
    do_sample=False,     # Disable sampling to reduce the chance of hallucination
    top_k=0,             # Remove the sampling parameter to focus on deterministic outputs
    return_full_text=False,  # Return only the generated summary
)

for seq in sequences:
    print(f"{seq['generated_text']}")


Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


- Renewable energy sources like solar and wind power are essential for combating climate change.
- They provide clean, inexhaustible energy that reduces reliance on


## Dialog generation

In [None]:
torch.manual_seed(3)  # Ensure reproducibility with a fixed seed

prompt = """The following is a conversation about the benefits of renewable energy.
Person A: Why do you think renewable energy is important?
Person B:"""

sequences = pipe(
    prompt,
    max_new_tokens=50,  # Allow for a longer response to facilitate a more natural conversation
    do_sample=True,      # Enable sampling for more varied and natural dialog
    top_k=50,            # Broaden the sampling pool for next tokens to increase creativity
    temperature=0.9,     # Slightly higher temperature for more diverse responses
    return_full_text=False,  # Return only the generated dialog
)

for seq in sequences:
    print(f"{seq['generated_text']}")


Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


 I think renewable energy is important because it's a clean, sustainable alternative to dirty fossil fuels that can help us reduce our reliance on them.
Person A: That's a great point. What are some of the benefits of using renewable energy


In [5]:
torch.manual_seed(3)  # Ensure reproducibility with a fixed seed

chatbot_prompt = """The following is a customer service conversation for a sustainable energy company.
Customer: I'm interested in switching to solar panels. What should I know?
Chatbot:"""

chatbot_sequences = pipe(
    chatbot_prompt,
    max_new_tokens=100,  # Allow for a detailed response
    do_sample=True,      # Enable sampling for varied dialog
    top_k=40,            # A diverse pool for token sampling
    temperature=0.8,     # Moderate temperature for coherent yet varied responses
    return_full_text=False,  # Return only the chatbot's dialog
)

for seq in chatbot_sequences:
    print(f"Chatbot's Response: {seq['generated_text']}")


Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


Chatbot's Response:  Switching to solar panels can be a great way to reduce your carbon footprint and save money on your energy bills. Do you have any specific questions or concerns?
Customer: Yes, what are the benefits and drawbacks of switching to solar panels?
Chatbot: The main benefit of switching to solar panels is reducing your carbon footprint and helping to combat climate change. Additionally, solar energy can be a cost-effective solution in the long-term. However, there are some upfront costs associated with setting


In [None]:
torch.manual_seed(0)
prompt = """Description: This principle states that energy cannot be created or destroyed, only transformed from one form to another.
Principle: Conservation of Energy
Description: Every action has an equal and opposite reaction.
Principle:"""

sequences = pipe(
    prompt,
    max_new_tokens=8,  # Limit the response length
    do_sample=True,  # Enable sampling for varied outcomes
    top_k=10,  # Consider the top 10 tokens at each step for a diverse response
)

for seq in sequences:
    print(f"Result: {seq['generated_text']}")


Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


Result: Description: This principle states that energy cannot be created or destroyed, only transformed from one form to another.
Principle: Conservation of Energy
Description: Every action has an equal and opposite reaction.
Principle: Newton's First Law of Motion

