# Prompting

In [None]:
%pip install -U llama-cpp-python faiss-cpu sentence-transformers
!mkdir models
!wget -O models/phi-2.q4.gguf https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q4_K_M.gguf

In [1]:
import pandas as pd
from llama_cpp import Llama
from transformers import pipeline
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import torch

In [None]:
df_author = pd.read_csv(
    "../dataset.csv",
    sep=",",            # delimitatore
    engine="python",    # parser più flessibile
    quotechar='"',      # gestisce testi con virgole
    encoding="utf-8",   # cambia in 'latin1' se hai caratteri strani
    on_bad_lines='skip')
df_author.head()

In [13]:
encoder = SentenceTransformer("all-MiniLM-L6-v2")
corpus_embeddings = encoder.encode(df_author["Tweet-text"].tolist(), convert_to_numpy=True)

dimension = corpus_embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(corpus_embeddings)

def retrieve_similar_tweets(df, query, k=5):
    query_embedding = encoder.encode([query])
    distances, indices = index.search(query_embedding, k)
    return df.iloc[indices[0]]["Tweet-text"].tolist(), distances[0]

# 4. Funzione per creare prompt da DataFrame e tweet di test
def create_prompt(df, test_tweet, n_context=3):
    context_tweets = get_best_tweets(df,test_tweet,n_context)
    context_str = "\n".join(f'- "{t}"' for t in context_tweets)
    prompt = f"""I will provide you with a list of tweets written by the same person.

Author's tweets:
{context_str}

Now, consider this new tweet:

"{test_tweet}"
Question: Could this tweet have been written by the same person?
Answer only with YES or NO."""
    return prompt

def get_best_tweets(df, tweet, k=5):
    retrieved_texts, distances = retrieve_similar_tweets(df, tweet, k=k)
    print(retrieved_texts, distances)
    examples = "\n".join([
        f'{i+1}. "{text}"'
        for i, text in enumerate(retrieved_texts)
    ])
    return retrieved_texts

    '''
    prompt = create_prompt(df, tweet, n_context=k)

    result = generator(prompt, max_new_tokens=5, do_sample=False)[0]["generated_text"]
    return result.strip()
    '''

In [None]:
llm = Llama(model_path="models/phi-2.q4.gguf", n_ctx=2048)

In [16]:
# 6. Testa con un tweet
test_tweet = """
Kobe was a legend on the court and just getting started in what would have been just as meaningful a second act. 
To lose Gianna is even more heartbreaking to us as parents.
Michelle and I send love and prayers to Vanessa and the entire Bryant family on an unthinkable day.
"""
prompt = create_prompt(df_author, test_tweet, n_context=10)

response = llm(prompt, max_tokens=100, echo=False)
print("Prompt:\n", prompt)
print("\nRisposta:", response['choices'][0]['text'].strip())

['We can make sure every single door is open every dream is within reach—for Malia for Sasha for your daughters.http://OFA.BO/SHbJtT\xa0', 'Michelle & I are praying for the victims in Las Vegas. Our thoughts are with their families & everyone enduring another senseless tragedy.', 'She outworked her rivals ... and became a role model to millions of Americans—including our two daughters. —President Obama on Pat Summitt', "If you're from Indiana you'd better have a good basketball team. —President Obama welcoming WNBA champs @IndianaFever to the White House", 'Michelle and I are thinking of the victims and their families in Barcelona. Americans will always stand with our Spanish friends. Un abrazo.', "Sandy and Lonnie lost a daughter in Aurora. They're asking: #WhatWillItTake for Congress to act?pic.twitter.com/wvkYbXLCdq", 'Michelle and I are overjoyed for all the committed couples in Illinois whose love will now be as legal as ours. —President Obama', "Jackie Robinson West All Stars: Am

llama_perf_context_print:        load time =    2632.07 ms
llama_perf_context_print: prompt eval time =    2631.47 ms /   501 tokens (    5.25 ms per token,   190.39 tokens per second)
llama_perf_context_print:        eval time =    2696.84 ms /    99 runs   (   27.24 ms per token,    36.71 tokens per second)
llama_perf_context_print:       total time =    5344.25 ms /   600 tokens


Prompt:
 I will provide you with a list of tweets written by the same person.

Author's tweets:
- "We can make sure every single door is open every dream is within reach—for Malia for Sasha for your daughters.http://OFA.BO/SHbJtT "
- "Michelle & I are praying for the victims in Las Vegas. Our thoughts are with their families & everyone enduring another senseless tragedy."
- "She outworked her rivals ... and became a role model to millions of Americans—including our two daughters. —President Obama on Pat Summitt"
- "If you're from Indiana you'd better have a good basketball team. —President Obama welcoming WNBA champs @IndianaFever to the White House"
- "Michelle and I are thinking of the victims and their families in Barcelona. Americans will always stand with our Spanish friends. Un abrazo."
- "Sandy and Lonnie lost a daughter in Aurora. They're asking: #WhatWillItTake for Congress to act?pic.twitter.com/wvkYbXLCdq"
- "Michelle and I are overjoyed for all the committed couples in Illi