In [1]:
import pandas as pd
import cudf
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, GPT2LMHeadModel, GPT2Tokenizer
import time
import re

# Filtrando os dados Originais

In [2]:
tweets1 = pd.read_excel('tweets1.xlsx')
tweets2 = pd.read_excel('tweets2.xlsx')

In [3]:
tweets_combined = pd.concat([tweets1, tweets2])

In [10]:
tweets_english = tweets_combined[tweets_combined['Tweet Language'] == 'English'].copy()

In [12]:
tweets_english['ID'] = range(1, len(tweets_english) + 1)

In [15]:
tweets_filtered = tweets_english[['ID', 'Tweet Content']].copy()

In [16]:
tweets_filtered.head()

Unnamed: 0,ID,Tweet Content
1,1,"""𝐈𝐭. 𝐂𝐚𝐧𝐧𝐨𝐭. 𝐆𝐞𝐭. 𝐁𝐢𝐠𝐠𝐞𝐫. 𝐓𝐡𝐚𝐧. 𝐓𝐡𝐢𝐬. 🔥\n\nGet..."
3,2,"""It’ll be a tough night for Europe today.\n\n#..."
4,3,"""In defeat or in victory, always say Alhamduli..."
5,4,"""FAFC Genesis Edition ( This collection have m..."
6,5,"""Get ready for zabardast action on #25th Jan ...."


In [17]:
tweets_filtered.to_csv('tweets_filtered.csv', index=False)

# Carregando dados

In [2]:
df = cudf.read_csv('tweets_filtered.csv')

tweets = df['Tweet Content'].head(5000).to_arrow().to_pylist()

# LLama 2

In [None]:
# pip install llama-cpp-python --prefer-binary --no-cache-dir --extra-index-url=https://jllllll.github.io/llama-cpp-python-cuBLAS-wheels/AVX2/cu122        

In [3]:
model_name_or_path = "TheBloke/Llama-2-7B-Chat-GGUF"
model_basename = "llama-2-7b-chat.Q4_K_M.gguf"

In [4]:
from huggingface_hub import hf_hub_download

In [5]:
from llama_cpp import Llama

In [6]:
model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)

In [7]:
lcpp_llm = Llama(
    model_path=model_path,
    n_threads=4,  # Número de núcleos da CPU
    n_batch=512,  # Deve estar entre 1 e n_ctx, considere a quantidade de VRAM na sua GPU
    n_gpu_layers=32  # Ajuste com base no modelo e na VRAM da GPU
)

ggml_init_cublas: GGML_CUDA_FORCE_MMQ:   yes
ggml_init_cublas: CUDA_USE_TENSOR_CORES: no
ggml_init_cublas: found 1 CUDA devices:
  Device 0: NVIDIA GeForce RTX 4060 Ti, compute capability 8.9, VMM: yes
llama_model_loader: loaded meta data with 19 key-value pairs and 291 tensors from /home/savio/.cache/huggingface/hub/models--TheBloke--Llama-2-7B-Chat-GGUF/snapshots/191239b3e26b2882fb562ffccdd1cf0f65402adb/llama-2-7b-chat.Q4_K_M.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = LLaMA v2
llama_model_loader: - kv   2:                       llama.context_length u32              = 4096
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                         

In [8]:
lcpp_llm.model_params.n_gpu_layers

32

In [9]:
prompt1 = "Read the following tweet inside brackets:\n"
prompt2 = "Rate the tweet you read on whether it mentions sports betting, answer just yes or no, if in doubt say no: "
str_for_promp = ''
str_for_promp += prompt1 + '['+tweets[0]+']' + '\n' + prompt2

In [10]:
prompt_template=f'''SYSTEM: You are a helpful, respectful and honest assistant. Always answer as helpfully.

USER: {str_for_promp}

ASSISTANT:
'''


In [11]:
response=lcpp_llm(prompt=prompt_template, max_tokens=512, temperature=0.5, top_p=0.95,
                  repeat_penalty=1.2, top_k=150,
                  echo=True)


llama_print_timings:        load time =     776.22 ms
llama_print_timings:      sample time =       0.46 ms /     3 runs   (    0.15 ms per token,  6593.41 tokens per second)
llama_print_timings: prompt eval time =     775.84 ms /   294 tokens (    2.64 ms per token,   378.95 tokens per second)
llama_print_timings:        eval time =      76.23 ms /     2 runs   (   38.11 ms per token,    26.24 tokens per second)
llama_print_timings:       total time =     856.90 ms


In [12]:
print(response["choices"][0]["text"])

SYSTEM: You are a helpful, respectful and honest assistant. Always answer as helpfully.

USER: Read the following tweet inside brackets:
["𝐈𝐭. 𝐂𝐚𝐧𝐧𝐨𝐭. 𝐆𝐞𝐭. 𝐁𝐢𝐠𝐠𝐞𝐫. 𝐓𝐡𝐚𝐧. 𝐓𝐡𝐢𝐬. 🔥

Get into the #FIFAWorldCup Final mode with none other than @iamsrk &amp; @WayneRooney on Dec 18, LIVE on #JioCinema &amp; #Sports18 📺📲

#Qatar2022 #ARGFRA #WorldsGreatestShow #FIFAWConJioCinema #FIFAWConSports18 #Pathaan"]
Rate the tweet you read on whether it mentions sports betting, answer just yes or no, if in doubt say no: 

ASSISTANT:
Yes.


In [13]:
answers = []
for tweet in tweets:
    prompt1 = "Read the following tweet inside brackets:\n"
    prompt2 = "Rate the tweet you read on whether it mentions sports betting, answer just yes or no, if in doubt say no: "
    str_for_promp = ''
    str_for_promp += prompt1 + '['+tweet+']' + '\n' + prompt2
    prompt_template=f'''SYSTEM: You are a helpful, respectful and honest assistant. Always answer as helpfully.

    USER: {str_for_promp}

    ASSISTANT:
    '''
    response=lcpp_llm(prompt=prompt_template, max_tokens=512, temperature=0.5, top_p=0.95,
                  repeat_penalty=1.2, top_k=150,
                  echo=True)
    to_filter = response["choices"][0]["text"]
    match = re.search(r'ASSISTANT:\s*(.*)', to_filter, re.IGNORECASE)
    answer = match.group(1).strip() if match else ""
    answers.append(answer)

Llama.generate: prefix-match hit

llama_print_timings:        load time =     776.22 ms
llama_print_timings:      sample time =       2.21 ms /    15 runs   (    0.15 ms per token,  6802.72 tokens per second)
llama_print_timings: prompt eval time =     653.67 ms /   275 tokens (    2.38 ms per token,   420.70 tokens per second)
llama_print_timings:        eval time =     517.83 ms /    14 runs   (   36.99 ms per token,    27.04 tokens per second)
llama_print_timings:       total time =    1196.27 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =     776.22 ms
llama_print_timings:      sample time =       6.46 ms /    44 runs   (    0.15 ms per token,  6811.15 tokens per second)
llama_print_timings: prompt eval time =     284.59 ms /    97 tokens (    2.93 ms per token,   340.84 tokens per second)
llama_print_timings:        eval time =    1511.28 ms /    43 runs   (   35.15 ms per token,    28.45 tokens per second)
llama_print_timings:       total time =    1

In [14]:
file_path = "classificacoes_nao_informadas.txt"

In [15]:
with open(file_path, "w") as file:
    for item in answers:
        file.write(f"{item}\n")

In [57]:
answers = []

for tweet in tweets:
    prompt1 = "Read the following tweet inside brackets:\n"
    prompt2 = "Sports betting is a practice where people bet money on the occurrence of a certain sequence of events, such as the score, number of fouls, goals by players, number of cards and so on. Taking this into account, tell us if the tweet refers to sports betting, answer just yes or no"
    str_for_promp = ''
    str_for_promp += prompt1 + '['+tweet+']' + '\n' + prompt2
    prompt_template=f'''SYSTEM: You are a helpful, respectful and honest assistant. Always answer as helpfully.

    USER: {str_for_promp}

    ASSISTANT:
    '''
    response=lcpp_llm(prompt=prompt_template, max_tokens=512, temperature=0.5, top_p=0.95,
                  repeat_penalty=1.2, top_k=150,
                  echo=True)
    to_filter = response["choices"][0]["text"]
    match = re.search(r'ASSISTANT:\s*(.*)', to_filter, re.IGNORECASE)
    answer = match.group(1).strip() if match else ""
    answers.append(answer)

Llama.generate: prefix-match hit

llama_print_timings:        load time =     701.19 ms
llama_print_timings:      sample time =       0.29 ms /     2 runs   (    0.14 ms per token,  6944.44 tokens per second)
llama_print_timings: prompt eval time =     738.86 ms /   299 tokens (    2.47 ms per token,   404.68 tokens per second)
llama_print_timings:        eval time =      37.61 ms /     1 runs   (   37.61 ms per token,    26.59 tokens per second)
llama_print_timings:       total time =     779.04 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =     701.19 ms
llama_print_timings:      sample time =       0.29 ms /     2 runs   (    0.14 ms per token,  6920.42 tokens per second)
llama_print_timings: prompt eval time =     322.80 ms /   135 tokens (    2.39 ms per token,   418.22 tokens per second)
llama_print_timings:        eval time =      35.84 ms /     1 runs   (   35.84 ms per token,    27.90 tokens per second)
llama_print_timings:       total time =     

In [58]:
file_path = "classificacoes_informadas.txt"

In [59]:
with open(file_path, "w") as file:
    for item in answers:
        file.write(f"{item}\n")

# Llama 3

In [3]:
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B")
model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B", 
    device_map="auto",
    torch_dtype=torch.bfloat16
)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Some parameters are on the meta device device because they were offloaded to the cpu.


In [17]:
answers = []
for tweet in tweets:
    prompt1 = "Read the following tweet inside brackets:\n"
    prompt2 = "Sports betting is a practice where people bet money on the occurrence of a certain sequence of events, such as the score, number of fouls, goals by players, number of cards and so on. Taking this into account, tell us if the tweet refers to sports betting, answer just yes or no"
    str_for_promp = ''
    str_for_promp += prompt1 + '['+tweet+']' + '\n' + prompt2
    prompt_template=f'''{str_for_promp} ANSWER:'''
    inputs = tokenizer(prompt_template, return_tensors="pt").to('cuda')
    
    with torch.no_grad():
        response = model.generate(
            inputs.input_ids,
            max_new_tokens=5,  # Número máximo de tokens para gerar
            pad_token_id=tokenizer.eos_token_id
        )
    
    generated_text = tokenizer.decode(response[0], skip_special_tokens=True)
    match = re.search(r'ANSWER:\s*(.*)', generated_text, re.IGNORECASE)
    answer = match.group(1).strip() if match else ""
    answers.append(answer)

In [18]:
file_path = "classificacoes_informadas_llama3.txt"

In [19]:
with open(file_path, "w") as file:
    for item in answers:
        file.write(f"{item}\n")

In [37]:
answers = []
for tweet in tweets:
    prompt1 = "Read the following tweet inside brackets:\n"
    prompt2 = "Rate the tweet you read on whether it mentions sports betting, answer just yes or no, if in doubt say no: "
    str_for_promp = ''
    str_for_promp += prompt1 + '['+tweet+']' + '\n' + prompt2
    prompt_template=f'''{str_for_promp} ANSWER:'''
    inputs = tokenizer(prompt_template, return_tensors="pt").to('cuda')
    
    with torch.no_grad():
        response = model.generate(
            inputs.input_ids,
            max_new_tokens=10,  # Número máximo de tokens para gerar
            pad_token_id=tokenizer.eos_token_id
        )
    
    generated_text = tokenizer.decode(response[0], skip_special_tokens=True)
    match = re.search(r'ANSWER:\s*(.*)', generated_text, re.IGNORECASE)
    answer = match.group(1).strip() if match else ""
    answers.append(answer)

In [38]:
file_path = "classificacoes_nao_informadas_llama3.txt"

In [39]:
with open(file_path, "w") as file:
    for item in answers:
        file.write(f"{item}\n")