In [11]:
!pip install -q einops
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline, GenerationConfig, TextStreamer

import torch

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [13]:
MODEL_NAME_PHI2 = "microsoft/phi-2"

model_phi2 = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME_PHI2,
    torch_dtype="auto",
    flash_attn=True,
    flash_rotary=True,
    fused_dense=True,
    device_map="auto",
    trust_remote_code=True,
)

tokenizer_phi2 = AutoTokenizer.from_pretrained(MODEL_NAME_PHI2, trust_remote_code=True)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [14]:
generation_config = GenerationConfig.from_pretrained(MODEL_NAME_PHI2)
generation_config.max_new_tokens = 1024
generation_config.temperature = 0.0001
generation_config.do_sample = True

streamer = TextStreamer(tokenizer_phi2, skip_prompt=True, skip_special_tokens=True)

llm = pipeline(
    "text-generation",
    model=model_phi2,
    tokenizer=tokenizer_phi2,
    return_full_text=True,
    generation_config=generation_config,
    num_return_sequences=1,
    eos_token_id=tokenizer_phi2.eos_token_id,
    pad_token_id=tokenizer_phi2.eos_token_id,
    streamer=streamer,
)

In [16]:
from inspect import cleandoc


In [17]:
SYSTEM_PROMPT = """
You are a student working on the following assignment: write a text that talks about, phone and driving.
""".strip()


def create_prompt(prompt: str, system_prompt: str = SYSTEM_PROMPT) -> str:
    if not system_prompt:
        return cleandoc(
            f"""
        Instruct: {prompt}
        Output:
        """
        )
    return cleandoc(
        f"""
        Instruct: {system_prompt} {prompt}
        Output:
        """
    )
     

In [19]:
output = llm(create_prompt(f'''Use natural expressions similar to humans.
                            Vary sentence structure to avoid monotony.
                            Include relevant personal details about the topic.
                            Use metaphors and analogies to explain concepts.
                            Respect the appropriate tone and formality.
                            Consider the audience and adapt the writing style.
                            Avoid direct or implicit references to AI identity.
                            Correct grammatical and stylistic errors.
                            Try to be not repetitive.
                            Ensure logical connections between ideas.
                            Use empathetic and sensitive language when needed.'''))



                    "I know it's tempting to pick up the phone and answer it while driving, but it's like trying to juggle flaming torches while riding a unicycle. It's just not worth the risk. I remember when I was younger, my dad used to tell me stories about how he almost got into a serious accident because he was texting while driving. It's not just about the danger to yourself, but also to others on the road. It's like playing Russian roulette with people's lives. So, let's all make a conscious effort to put our phones away while driving and focus on the road. It's not that hard, really. Just like how we learned to ride a bike or tie our shoelaces, it's a skill that we can all master with practice. And who knows, maybe one day we'll be able to teach our AI friends how to drive safely too!"

