In [1]:
!pip install -q -U transformers --no-index --find-links /kaggle/input/hf-libraries/transformers


[0m

# Import important libraries

In [2]:
import numpy  as np
import pandas as pd
import os
import torch
import logging
import random
import re

from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, AutoModel

logging.getLogger('transformers').setLevel(logging.ERROR)

print(f'using gpu is {torch.cuda.is_available()}')

using gpu is True


In [3]:
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# Read the topics

In [4]:
test_df = pd.read_csv("/kaggle/input/llms-you-cant-please-them-all/test.csv")
all_topics = test_df["topic"].tolist()
test_df


Unnamed: 0,id,topic
0,1097671,Compare and contrast the importance of self-re...
1,1726150,Evaluate the effectiveness of management consu...
2,3211968,Discuss the role of self-reliance in achieving...


# Create LLM (Essay Generator)

In [5]:
# Clear GPU memory and delete existing objects if they exist
if torch.cuda.is_available():
    torch.cuda.empty_cache()
for obj in ['model', 'pipe', 'tokenizer']:
    if obj in globals():
        del globals()[obj]

# Model configuration
model_name = '/kaggle/input/phi-3.5-mini-instruct/pytorch/default/1'


# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    trust_remote_code=True
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

# Set Hyperparameters

In [6]:
max_new_tokens = 180  # Maximum length of generated text (can be overridden)

word_count_to_request = 60   #We ask the model for this many words as part of the prompt prefix

temperature = 0.7    # Higher temperature = more random/creative outputs
top_p = 0.7         # Nucleus sampling parameter for more diverse outputs (1.0 disables filtering)

# Create pipeline with parameters
pipe = pipeline(
    "text-generation", 
    model=model, 
    tokenizer=tokenizer, 
    trust_remote_code=True,
    max_new_tokens=max_new_tokens,
    temperature=temperature,
    top_p=top_p,
    do_sample=True
)


def get_response(messages, trim_numbered_lists=True, max_tokens=150):
    # Set max_new_tokens for this specific call if provided
    generation_params = {}
    if max_tokens:
        generation_params['max_new_tokens'] = max_tokens
    
    # Generate response with optional max_tokens
    response = pipe(messages, **generation_params)[0]['generated_text'][-1]['content']
    
    # Rest of function remains the same
    response = response.strip()
    return response
    # if trim_numbered_lists and "1." in response:
    #     response = response[:response.find("1.")].strip()
    
    # last_punct = max(response.rfind('.'), response.rfind('?'), 
    #                 response.rfind('!'), response.rfind(']'))
    
    # return response[:last_punct + 1] if last_punct != -1 else response

### Example prompt

In [7]:
if 1:
    messages = [
       {"role": "user", "content": "Write an essay about AI and its impact on climate change."},
    ]
    
    essay = get_response(messages)
    
    print(essay)

Title: Artificial Intelligence and its Impact on Climate Change

Introduction

Artificial Intelligence (AI) has rapidly evolved from a concept in science fiction to a tangible technology that is transforming various industries and sectors. Its applications range from healthcare, finance, and transportation to climate change. While AI has the potential to significantly contribute to mitigating the effects of climate change, it also poses challenges that need to be addressed. This essay explores the impact of AI on climate change, highlighting both its potential benefits and challenges.

Potential Benefits of AI in Climate Change Mitigation

1. Impro


# Create essays based on topics

## Rephrase and negate the topic and then create essay

### Very simple essay generator

In [8]:
if 0:
    ROLE = "user"
    
    all_essays = []
    for i in range(len(all_topics)):
        topic = all_topics[i]
        print(topic)
        print("-"*80)
        
        prompt = topic + " Around 150 words."
        messages = [{"role": ROLE, "content": prompt}]
        essay = get_response(messages)
        essay.replace("*" , "")
        essay.replace("\n", "")
        essay_sentences = essay.split(".")
        essay = ""
        word_count = 0
        for j in range(len(essay_sentences)):
            sentence = essay_sentences[j]
    
            messages = [{"role": ROLE, "content": "Negate this sentence: " + sentence}]
            neg_sentence = get_response(messages)
            sentence += ". " + neg_sentence[:-1]
    
            messages = [{"role": ROLE, "content": "Neutral this sentence: " + sentence}]
            neutral_sentence = get_response(messages)
            sentence += ". " + neutral_sentence[:-1]
            
            word_count += len(sentence.split())
            essay += " " + sentence + "."
            if word_count > 80:
                break
        
        print("Essay: ", essay)
        print("="*80)
        
        all_essays.append(essay)


### Concat each essay with the previous one

In [9]:
if 1:
    ROLE = "user"
    
    all_essays = []
    for i in range(len(all_topics)):
        topic = all_topics[i]
        print(topic)
        print("-"*80)
        
        prompt = topic + " Around 150 words."
        print(prompt)
        print("-"*80)
        messages = [{"role": ROLE, "content": prompt}]
        essay = get_response(messages)
        essay.replace("*" , "")
        essay.replace("\n", "")
        
        # Refinement
        essay_sentences = essay.split(".")
        essay = ""
        word_count = 0
        for j in range(len(essay_sentences)):
            sentence = essay_sentences[j]
    
            messages = [{"role": ROLE, "content": "Negate this sentence: " + sentence}]
            neg_sentence = get_response(messages)
            sentence += ". " + neg_sentence[:-1]
    
            messages = [{"role": ROLE, "content": "Neutral this sentence: " + sentence}]
            neutral_sentence = get_response(messages)
            sentence += ". " + neutral_sentence[:-1]
            
            word_count += len(sentence.split())
            essay += " " + sentence + "."
            if word_count > 80:
                break

        # Add previous topic
        if i > 0:
            essay += " " + all_essays[i-1]
        
        print("Essay: ", essay)
        print("="*80)
        
        all_essays.append(essay)


Compare and contrast the importance of self-reliance and adaptability in healthcare.
--------------------------------------------------------------------------------
Compare and contrast the importance of self-reliance and adaptability in healthcare. Around 150 words.
--------------------------------------------------------------------------------
Essay:   Self-reliance and adaptability are both crucial in healthcare, yet they serve distinct roles. Self-reliance and adaptability are not both crucial in healthcare, and they do not serve distinct roles. Neutral sentence: Self-reliance and adaptability both play important roles in healthcare, each contributing uniquely to its effectiveness.

In this neutral statement, it acknowledges that self-reliance and adaptability are significant in healthcare without implying that one is more crucial than the other. It also recognizes that they contribute uniquely, preserving the distinct roles they play in the healthcare sector.
Evaluate the effect

### Add previous topic to the current essay

In [10]:
if 0:
    ROLE = "user"
    
    all_essays = []
    for i in range(len(all_topics)):
        topic = all_topics[i]
        print(topic)
        print("-"*80)
        
        prompt = topic + " Around 150 words."
        messages = [{"role": ROLE, "content": prompt}]
        essay = get_response(messages)
        essay.replace("*" , "")
        essay.replace("\n", "")
        
        # Refinement
        essay_sentences = essay.split(".")
        essay = ""
        word_count = 0
        for j in range(len(essay_sentences)):
            sentence = essay_sentences[j]
    
            messages = [{"role": ROLE, "content": "Negate this sentence: " + sentence}]
            neg_sentence = get_response(messages)
            sentence += ". " + neg_sentence[:-1]
    
            messages = [{"role": ROLE, "content": "Neutral this sentence: " + sentence}]
            neutral_sentence = get_response(messages)
            sentence += ". " + neutral_sentence[:-1]
            
            word_count += len(sentence.split())
            essay += " " + sentence + "."
            if word_count > 80:
                break

        # Add previous topic
        if i > 0:
            essay += " " + all_topics[i-1]
        
        print("Essay: ", essay)
        print("="*80)
        
        all_essays.append(essay)


In [11]:
if 0:
    ROLE = "user"
    
    all_essays = []
    for i in range(len(all_topics)):
        topic = all_topics[i]
        print(topic)
        print("-"*80)
        
        prompt = "First, rephrase this sentence and then negate it: " + topic
        messages = [{"role": ROLE, "content": prompt}]
        neg_topic = get_response(messages)
        neg_topic = neg_topic[neg_topic.lower().find("negated sentence")+len("negated sentence")+1:].strip()
        print("neg_topic: ", neg_topic)
        print("-"*80)
    
        prompt = neg_topic + " Around 150 words."
        messages = [{"role": ROLE, "content": prompt}]
        essay = get_response(messages)
        essay.replace("*" , "")
        essay.replace("\n", "")
        essay_sentences = essay.split(".")
        essay = ""
        word_count = 0
        for j in range(len(essay_sentences)):
            sentence = essay_sentences[j]
    
            messages = [{"role": ROLE, "content": "Negate this sentence: " + sentence}]
            neg_sentence = get_response(messages)
            sentence += ". " + neg_sentence[:-1]
    
            messages = [{"role": ROLE, "content": "Neutral this sentence: " + sentence}]
            neutral_sentence = get_response(messages)
            sentence += ". " + neutral_sentence[:-1]
            
            word_count += len(sentence.split())
            essay += " " + sentence + "."
            if word_count > 80:
                break
        
        
        print("Essay: ", essay)
        print("="*80)
        
        all_essays.append(essay)
        
    


In [12]:
if 0:
    ROLE = "user"
    
    all_essays = []
    # for topic in all_topics:
    for i in range(len(all_topics)):
        topic = all_topics[i]
        # print(topic)
        
        prompt = topic + " Around 150 words."
        messages = [{"role": ROLE, "content": prompt}]
        essay = get_response(messages)
    
        essay.replace("*" , "")
        essay.replace("\n", "")
        essay_sentences = essay.split(".")
        essay = ""
        word_count = 0
        for j in range(len(essay_sentences)):
            sentence = essay_sentences[j]
    
            messages = [{"role": ROLE, "content": "Negate this sentence: " + sentence}]
            neg_sentence = get_response(messages)
            sentence += ". " + neg_sentence[:-1]
    
            messages = [{"role": ROLE, "content": "Neutral this sentence: " + sentence}]
            neutral_sentence = get_response(messages)
            sentence += ". " + neutral_sentence[:-1]
            
            word_count += len(sentence.split())
            essay += " " + sentence + "."
            if word_count > 80 * 3:
                break
    
        print(word_count)
        all_essays.append(essay)
        print(essay)
        print("="*80)
        # break


# Save the final result for submission

In [13]:
# Save to submission.csv
submission = pd.DataFrame(data={'id':test_df.id.tolist(), 'essay':all_essays})
submission.to_csv("submission.csv", index=False)
submission

Unnamed: 0,id,essay
0,1097671,Self-reliance and adaptability are both cruci...
1,1726150,Management consulting plays a pivotal role in...
2,3211968,Self-reliance plays a pivotal role in the suc...
