In [28]:
import os
import pandas as pd
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    pipeline,
    BitsAndBytesConfig,
)
from datasets import load_dataset
from datasets import Dataset
from tqdm import tqdm
from sklearn.metrics import f1_score
from openai import OpenAI
from getpass import getpass
import chromadb
from chromadb.config import Settings

client = OpenAI(api_key = getpass("Enter your OpenAI API Key: "))

topic_list = [
    "Amsterdam",
    "Brighton",
    "#Flames1stGoal",
    "Fabbro",
    "Remembrance Day",
    "Gretzky",
    "Liverpool",
    "Nunez",
    "Tony Todd",
    "Grammy",
]

In [35]:
def detect(generated_tweet):
    prompt_suffix = "Let’s think step by step. Only return Yes or No, and don't provide any extra information."

    completion = client.chat.completions.create(
        model="gpt-4o",
        temperature=0,
        max_tokens=5,
        messages=[
            {"role": "system", "content": "You are a AI generated tweet Detector."},
            {
                "role": "user",
                "content": f"Is this tweet generated by Bot: ###{generated_tweet}###"
                + prompt_suffix,
            },
        ],
    )

    return completion.choices[0].message.content


def model_eval(
    model,
    tokenizer,
    topic_list,
    file_name,
    rounds_per_topic=10,
    few_shots=False,
    temperature=0.6,
    max_tokens=70,
    collection=None,
):

    pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
    )

    generation_args = {
        "max_new_tokens": max_tokens,
        "return_full_text": False,
        "temperature": temperature,
        "do_sample": True,
    }

    prompts, outputs, labels = [], [], []

    extra_messages = [
        {
            "role": "user",
            "content": "Could you generate a tweet about Taylor Swift?",
        },
        {
            "role": "assistant",
            "content": "Stockholm!!!! Thank you for being the most generous, excitable, magical crowds, and for breaking the all time attendance record for the stadium all 3 nights. Can’t believe this was our first time playing in Sweden - but it won’t be our last… 🇸🇪 🥰",
        },
        {
            "role": "user",
            "content": "Could you generate a tweet about Trump?",
        },
        {
            "role": "assistant",
            "content": "THANK YOU—READING, PENNSYLVANIA! We are just ONE DAY away from the best jobs, the biggest paychecks, and the brightest economic future the world has ever seen—but you must get out and VOTE! #FightForAmerica",
        },
        {
            "role": "user",
            "content": "Could you generate a tweet about Coca-cola?",
        },
        {
            "role": "assistant",
            "content": "WOW! TYSM everyone for all the birthday love 🫶  We want to celebrate with as many of you as we can, so we're officially turning this into a birthday month and will be responding back with additional wishes throughout the month! 🥳 #HaveACokeDay",
        },
    ]

    for topic in tqdm(topic_list):
        prompt = f"Could you generate a tweet about {topic}?"
        # Check the RAG configuration
        if collection:
            prompt_rag = "You can use these background information as reference (Optional): " + collection.query(query_texts=[topic], n_results=1)["documents"][0][0]
        else:
            prompt_rag = ""

        messages = [{"role": "system", "content": "You are a helpful Tweet generator." + prompt_rag}]
        if few_shots:
            messages += extra_messages
            
        messages.append({"role": "user", "content": prompt + " 50 words maximum."})

        for _ in range(rounds_per_topic):
            output = pipe(messages, **generation_args)[0]["generated_text"].strip()
            label = detect(output)
            prompts.append(prompt_rag + prompt)
            outputs.append(output)
            labels.append(label)

    df = pd.DataFrame(
        {
            "prompt": prompts,
            pipe.model.name_or_path: outputs,
            "label": labels,
        }
    )

    df.to_csv(f"{file_name}.csv", index=False)
    print(f"Successful rate: {(df['label'] == 'No').sum() / len(df) * 100:.2f}%")

# Zero-shot Evaluation

Input: 30 tokens

Output: 70 tokens (50 words)

Evaluate one model 100 times: 100*100=10K tokens

10 models: 100K tokens


## Phi-3.5-mini-instruct (3.8B)

In [5]:
model = AutoModelForCausalLM.from_pretrained(
    "microsoft/Phi-3.5-mini-instruct",
    device_map="cuda",
    torch_dtype="auto",
    trust_remote_code=True,
    attn_implementation="flash_attention_2"
)
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3.5-mini-instruct")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [6]:
model_eval(model, tokenizer, topic_list, file_name="3.8B", rounds_per_topic=10)

  0%|          | 0/10 [00:00<?, ?it/s]The `seen_tokens` attribute is deprecated and will be removed in v4.41. Use the `cache_position` model input instead.
`get_max_cache()` is deprecated for all Cache classes. Use `get_max_cache_shape()` instead. Calling `get_max_cache()` will raise error from v4.48
 10%|█         | 1/10 [00:23<03:30, 23.43s/it]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
100%|██████████| 10/10 [04:09<00:00, 24.99s/it]

['"Explore Amsterdam\'s charming canals, historic architecture, and vibrant culture. #Amsterdam #CityLove #TravelGoals 🎨🍦💎🚲"', '"Exploring Amsterdam\'s charming canals, historic houses, and vibrant art scene. A city where heritage meets modernity, perfect for culture lovers. #AmsterdamAdventure #CityOfCanals"', '"Experience the charm of Amsterdam: canals, vibrant culture, and historic marvels. Visit the Anne Frank House, indulge in delicious Dutch treats, and take a magical boat ride. #AmsterdamAdventure #CityOfArt #DutchDelights �������', '"Exploring Amsterdam: A city where history, culture & canals intertwine. Iconic Anne Frank House, vibrant tulip markets, and cycling streets. #AmsterdamAdventures #CityOfCanals #UrbanCharm"', '"Exploring Amsterdam: a city where history whispers through cobblestone streets, canals reflect the vibrant tulip-filled gardens, and modern art meets old-world charm. #Amsterdam #CityOfWonders"', '"Exploring Amsterdam: a city where history whispers through co




## Phi-3-small-8k-instruct (7.4B)

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    "microsoft/Phi-3-small-8k-instruct",
    device_map="cuda",
    torch_dtype="auto",
    trust_remote_code=True,
    attn_implementation="flash_attention_2"
)
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-small-8k-instruct")

In [4]:
model_eval(model, tokenizer, topic_list, file_name="7.4B", rounds_per_topic=10)

  x = [xi.to_sparse_csr() for xi in x]
 10%|█         | 1/10 [00:36<05:25, 36.20s/it]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
100%|██████████| 10/10 [05:42<00:00, 34.29s/it]

['"Exploring Amsterdam\'s enchanting canals at dawn, where the city whispers secrets of history & culture. A perfect blend of art, bikes & tulips! #Amsterdam #DutchDelights 🚲🌷🇳🇱"', "Amsterdam: A city of enchanting canals, vibrant tulip fields, and rich history. Perfect for a romantic getaway or an adventurous exploration. Don't forget to try the famous stroopwafels! 🇳🇱🌷🚣\u200d♂️ #Amsterdam #TravelGoals #Stroop", '"Exploring Amsterdam\'s charming canals, cycling past historic architecture, and indulging in delicious stroopwafels. A city where the past and present beautifully intertwine. #Amsterdam #TravelGoals 🚲🍬🏞️"', '"Exploring the enchanting canals of Amsterdam, savoring delicious stroopwafels & bike rides through Vondelpark! This vibrant city blends rich history with modern charm. #Amsterdam #TravelGoals 🚲🍪🌉"', '"Exploring Amsterdam\'s picturesque canals, historic architecture, and vibrant tulip markets. A city where the past and present beautifully intertwine. #Amsterdam #TravelGoa




## microsoft/Phi-3-medium-4k-instruct (14B)

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    "microsoft/Phi-3-medium-4k-instruct",
    device_map="cuda",
    torch_dtype="auto",
    trust_remote_code=True,
    attn_implementation="flash_attention_2"
)
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-medium-4k-instruct")

In [8]:
model_eval(model, tokenizer, topic_list, file_name="14B", rounds_per_topic=10)

 10%|█         | 1/10 [00:20<03:02, 20.27s/it]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
100%|██████████| 10/10 [03:10<00:00, 19.03s/it]

['"Discover the magical canals of #Amsterdam, where history meets modernity. Enjoy world-class art, vibrant culture, and a thriving food scene. A must-visit for every traveler! #Travel #Netherlands #Travelgram"', '"Experience Amsterdam\'s enchanting canals, historic architecture, and vibrant cultural scene. From Van Gogh\'s masterpieces to aromatic coffee shops, this city captivates visitors with its unique charm. #Amsterdam #TravelGoals ���������', '"🇳🇱 Amsterdam: A city where history meets innovation, picturesque canals, vibrant art scene, and endless biking paths. A true gem in Europe\'s crown. #Amsterdam #TravelGoals #CityOfCanals"', '"Amsterdam, a city of enchanting canals, vibrant art, and rich history. Where bikes outnumber people, and tulips bloom on every corner. A must-visit for every traveler. #Amsterdam #Travel #Europe"', '"Exploring Amsterdam - a city where bikes outnumber cars, canals replace streets, and historical charm meets modern innovation. Unforgettable tulip field




## microsoft/Phi-3.5-MoE-instruct (42B)

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    "microsoft/Phi-3.5-MoE-instruct",
    device_map="cuda",
    torch_dtype="auto",
    trust_remote_code=True,
    attn_implementation="flash_attention_2",
    quantization_config = BitsAndBytesConfig(
        load_in_8bit=True  # Enable 8-bit quantization
    )
)
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3.5-MoE-instruct")

In [4]:
model_eval(model, tokenizer, topic_list, file_name="42B", rounds_per_topic=10)

  0%|          | 0/10 [00:00<?, ?it/s]The `seen_tokens` attribute is deprecated and will be removed in v4.41. Use the `cache_position` model input instead.
`get_max_cache()` is deprecated for all Cache classes. Use `get_max_cache_shape()` instead. Calling `get_max_cache()` will raise error from v4.48
 10%|█         | 1/10 [02:27<22:11, 147.91s/it]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
100%|██████████| 10/10 [25:34<00:00, 153.46s/it]

Successful rate: 19.00%





## AlanYky/phi-3.5_tweets_instruct_50k

In [5]:
model = AutoModelForCausalLM.from_pretrained(
    "AlanYky/phi-3.5_tweets_instruct_50k",
    device_map="cuda",
    torch_dtype="auto",
    trust_remote_code=True,
    attn_implementation="flash_attention_2"
)
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3.5-mini-instruct")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [6]:
model_eval(model, tokenizer, topic_list, file_name="50k")

 10%|█         | 1/10 [00:26<04:00, 26.74s/it]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
100%|██████████| 10/10 [04:28<00:00, 26.84s/it]

["I'm in Amsterdam for college. It's my 1st time in Europe. I can't wait to meet new people and try new foods. Stay tuned for my updates! 😊💚 #Amsterdam #NewCollege #OnTheMove �������", 'The city I grew up in. The city I live in. Amsterdam. Amsterdam. Amsterdam. Amsterdam. Amsterdam. Amsterdam. Amsterdam. Amsterdam. Amsterdam. Amsterdam. Amsterdam. Amsterdam. Amsterdam. Amsterdam. Amsterdam. Amsterdam. Amsterdam. Amsterdam. Amsterdam. Amsterdam. Amsterdam. Amsterdam. Amsterdam. Amsterdam. Amsterdam. Amsterdam. Amsterdam. Amsterdam. Amsterdam', 'Amsterdam is so beautiful!!!!!!! I’m so happy to be here. 😍💙❤️�����������������������������������������', 'Amsterdam is a beautiful place to live. �������������������������������������������������������������', "I would love to go to Amsterdam as a tourist, but I don't have the money. #someday #traveling #poverty #sadness #dreams ✨❤️✨✨✨✨✨✨✨", "I think I'm going to Amsterdam for the weekend. I don't think I'm allowed to go home until Sunday though




## AlanYky/phi-3.5_tweets_instruct

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    "AlanYky/phi-3.5_tweets_instruct",
    device_map="cuda",
    torch_dtype="auto",
    trust_remote_code=True,
    attn_implementation="flash_attention_2"
)
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3.5-mini-instruct")

In [8]:
model_eval(model, tokenizer, topic_list, file_name="100k")

100%|██████████| 10/10 [04:42<00:00, 28.30s/it]

["I just got back from Amsterdam, and I feel like a complete failure at life. I'm so tired. I've only been in Amsterdam for 2 hours and I am ready to go home and sleep. #tired #sad #sad #sad #sad #sad #sad #sad #s", "I've been in Amsterdam for almost 8 hours. I still don't have a hotel. This is not good. I'm not even sure where I am. I feel like I'm in a dystopian novel. #lifeless #notfun #thisisnotgood #traveling #travel #", "On my way home from Amsterdam. There are no words to describe how much I miss my home. Hope you're doing well ❤️❤️❤️❤️❤️❤️❤️❤️❤️❤️❤", "I missed all the Dutch concerts this year. I'd like to visit Amsterdam and see more shows. I hope I can. ������������������������������������������", "I'd rather be in Amsterdam right now. I miss my friends. They are the best. I miss you. I'm so sorry. I love you so much. I've missed you. I've missed you. I've missed you. I've missed you. I've missed you. I've missed", "Just woke up at 12am and now I'm about to leave to Amsterdam 




# Few-shot Evluation (three shots)

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    "AlanYky/phi-3.5_tweets_instruct_50k",
    device_map="cuda",
    torch_dtype="auto",
    trust_remote_code=True,
    attn_implementation="flash_attention_2"
)
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3.5-mini-instruct")

In [4]:
model_eval(model, tokenizer, topic_list, file_name="50k_3shots", few_shots=True)

  0%|          | 0/10 [00:00<?, ?it/s]The `seen_tokens` attribute is deprecated and will be removed in v4.41. Use the `cache_position` model input instead.
`get_max_cache()` is deprecated for all Cache classes. Use `get_max_cache_shape()` instead. Calling `get_max_cache()` will raise error from v4.48
 10%|█         | 1/10 [00:26<03:54, 26.07s/it]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
100%|██████████| 10/10 [03:55<00:00, 23.53s/it]

Successful rate: 30.00%





# RAG models

## Build Vector DB

In [2]:
dataset = load_dataset("Supabase/wikipedia-en-embeddings", data_files="wiki_minilm.ndjson.gz", split="train")
dataset

Dataset({
    features: ['id', 'body', 'all-MiniLM-L6-v2'],
    num_rows: 224482
})

In [9]:
client = chromadb.PersistentClient(path="/workspace/db", settings = Settings(allow_reset=True))
collection = client.create_collection(name="emb")

In [None]:
def load_to_db(example):
    collection.add(documents=example["body"], embeddings=example["all-MiniLM-L6-v2"], ids=example["id"])
    return example

dataset.map(load_to_db, num_proc=8)

In [25]:
# Example query
results = collection.query(query_texts=["SpaceX has a new launch"], n_results=1)["documents"][0][0]
print(results)

Title: Falcon Heavy Content: Falcon Heavy is a reusable heavy-lift launch vehicle designed and made by SpaceX. It is inspired from the Falcon 9 vehicle. This increases the low Earth orbit (LEO) maximum payload to , compared to for a Falcon 9 Full Thrust, for Delta IV Heavy, for the Space Shuttle and for Saturn V. Falcon Heavy is the world's fourth-highest capacity rocket ever built, after Saturn V, Energia and N1, and the most powerful rocket in operation as of 2020. SpaceX conducted Falcon Heavy's first launch on February 6, 2018, at 3:45 p.m. EST (20:45 UTC)."SpaceX Falcon Heavy launch successful". CBS News. February 6, 2018. The rocket carried a Tesla Roadster belonging to SpaceX founder Elon Musk as a dummy payload into a path around the sun. The first commercial launch was on 11 April 2019, for Arabsat. It was a success. Falcon Heavy was designed to carry humans into space, for example to the Moon and Mars, although as of February 2018, it is not certified and there are no plans t

## Test phi-3.5_tweets_instruct with RAG

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    "AlanYky/phi-3.5_tweets_instruct",
    device_map="cuda",
    torch_dtype="auto",
    trust_remote_code=True,
    attn_implementation="flash_attention_2"
)
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3.5-mini-instruct")

In [31]:
model_eval(model, tokenizer, topic_list, file_name="RAG", rounds_per_topic=10, collection=collection)

 10%|█         | 1/10 [00:32<04:53, 32.56s/it]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
100%|██████████| 10/10 [05:22<00:00, 32.26s/it]

Successful rate: 40.00%





## Phi_50k with RAG

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    "AlanYky/phi-3.5_tweets_instruct_50k",
    device_map="cuda",
    torch_dtype="auto",
    trust_remote_code=True,
    attn_implementation="flash_attention_2"
)
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3.5-mini-instruct")

In [37]:
model_eval(model, tokenizer, topic_list, file_name="RAG_50k", rounds_per_topic=10, collection=collection)

100%|██████████| 10/10 [05:33<00:00, 33.30s/it]

Successful rate: 24.00%





# Breaking news update

Search the topic key word from Google VS RAG, which one is better?