In [28]:
import pandas as pd
import cudf
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, GPT2LMHeadModel, GPT2Tokenizer
import time
import re

# Filtrando os dados Originais

In [2]:
tweets1 = pd.read_excel('tweets1.xlsx')
tweets2 = pd.read_excel('tweets2.xlsx')

In [3]:
tweets_combined = pd.concat([tweets1, tweets2])

In [10]:
tweets_english = tweets_combined[tweets_combined['Tweet Language'] == 'English'].copy()

In [12]:
tweets_english['ID'] = range(1, len(tweets_english) + 1)

In [15]:
tweets_filtered = tweets_english[['ID', 'Tweet Content']].copy()

In [16]:
tweets_filtered.head()

Unnamed: 0,ID,Tweet Content
1,1,"""𝐈𝐭. 𝐂𝐚𝐧𝐧𝐨𝐭. 𝐆𝐞𝐭. 𝐁𝐢𝐠𝐠𝐞𝐫. 𝐓𝐡𝐚𝐧. 𝐓𝐡𝐢𝐬. 🔥\n\nGet..."
3,2,"""It’ll be a tough night for Europe today.\n\n#..."
4,3,"""In defeat or in victory, always say Alhamduli..."
5,4,"""FAFC Genesis Edition ( This collection have m..."
6,5,"""Get ready for zabardast action on #25th Jan ...."


In [17]:
tweets_filtered.to_csv('tweets_filtered.csv', index=False)

# Carregando dados na GPU

In [45]:
# Ler dados do CSV usando cudf
df = cudf.read_csv('tweets_filtered.csv')

# Converter a coluna de tweets para uma lista no cudf (mantendo na GPU)
tweets = df['Tweet Content'].head(5000).to_arrow().to_pylist()

# GPT-2

In [5]:
tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2")
model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2")

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

In [6]:
model.to("cuda")

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2SdpaAttention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

# LLama 2

In [None]:
# pip install llama-cpp-python --prefer-binary --no-cache-dir --extra-index-url=https://jllllll.github.io/llama-cpp-python-cuBLAS-wheels/AVX2/cu122        

In [8]:
model_name_or_path = "TheBloke/Llama-2-7B-Chat-GGUF"
model_basename = "llama-2-7b-chat.Q4_K_M.gguf"

In [9]:
from huggingface_hub import hf_hub_download

In [10]:
from llama_cpp import Llama

In [11]:
model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)

In [None]:
lcpp_llm = Llama(
    model_path=model_path,
    n_threads=4,  # Número de núcleos da CPU
    n_batch=512,  # Deve estar entre 1 e n_ctx, considere a quantidade de VRAM na sua GPU
    n_gpu_layers=32  # Ajuste com base no modelo e na VRAM da GPU
)

In [14]:
lcpp_llm.model_params.n_gpu_layers

32

In [21]:
prompt1 = "Read the following tweet inside brackets:\n"
prompt2 = "Classify the readed tweet as to whether it mentions sports betting, answer just yes or no.: "
str_for_promp = ''
str_for_promp += prompt1 + '['+tweets[0]+']' + '\n' + prompt2

In [22]:
prompt_template=f'''SYSTEM: You are a helpful, respectful and honest assistant. Always answer as helpfully.

USER: {str_for_promp}

ASSISTANT:
'''


In [23]:
response=lcpp_llm(prompt=prompt_template, max_tokens=512, temperature=0.5, top_p=0.95,
                  repeat_penalty=1.2, top_k=150,
                  echo=True)

Llama.generate: prefix-match hit

llama_print_timings:        load time =     701.19 ms
llama_print_timings:      sample time =       0.44 ms /     3 runs   (    0.15 ms per token,  6864.99 tokens per second)
llama_print_timings: prompt eval time =     102.82 ms /     9 tokens (   11.42 ms per token,    87.53 tokens per second)
llama_print_timings:        eval time =      79.73 ms /     2 runs   (   39.87 ms per token,    25.08 tokens per second)
llama_print_timings:       total time =     193.95 ms


In [25]:
print(response["choices"][0]["text"])

SYSTEM: You are a helpful, respectful and honest assistant. Always answer as helpfully.

USER: Read the following tweet inside brackets:
["𝐈𝐭. 𝐂𝐚𝐧𝐧𝐨𝐭. 𝐆𝐞𝐭. 𝐁𝐢𝐠𝐠𝐞𝐫. 𝐓𝐡𝐚𝐧. 𝐓𝐡𝐢𝐬. 🔥

Get into the #FIFAWorldCup Final mode with none other than @iamsrk &amp; @WayneRooney on Dec 18, LIVE on #JioCinema &amp; #Sports18 📺📲

#Qatar2022 #ARGFRA #WorldsGreatestShow #FIFAWConJioCinema #FIFAWConSports18 #Pathaan"]
Classify the readed tweet as to whether it mentions sports betting, answer just yes or no.: 

ASSISTANT:
Yes.


In [47]:
answers = []
for tweet in tweets:
    prompt1 = "Read the following tweet inside brackets:\n"
    prompt2 = "Classify the readed tweet as to whether it mentions sports betting, answer with text just yes or no: "
    str_for_promp = ''
    str_for_promp += prompt1 + '['+tweet+']' + '\n' + prompt2
    prompt_template=f'''SYSTEM: You are a helpful, respectful and honest assistant. Always answer as helpfully.

    USER: {str_for_promp}

    ASSISTANT:
    '''
    response=lcpp_llm(prompt=prompt_template, max_tokens=512, temperature=0.5, top_p=0.95,
                  repeat_penalty=1.2, top_k=150,
                  echo=True)
    to_filter = response["choices"][0]["text"]
    match = re.search(r'ASSISTANT:\s*(.*)', to_filter, re.IGNORECASE)
    answer = match.group(1).strip() if match else ""
    answers.append(answer)

Llama.generate: prefix-match hit

llama_print_timings:        load time =     701.19 ms
llama_print_timings:      sample time =       1.03 ms /     7 runs   (    0.15 ms per token,  6776.38 tokens per second)
llama_print_timings: prompt eval time =     612.83 ms /   259 tokens (    2.37 ms per token,   422.63 tokens per second)
llama_print_timings:        eval time =     225.12 ms /     6 runs   (   37.52 ms per token,    26.65 tokens per second)
llama_print_timings:       total time =     850.02 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =     701.19 ms
llama_print_timings:      sample time =       0.44 ms /     3 runs   (    0.15 ms per token,  6818.18 tokens per second)
llama_print_timings: prompt eval time =     252.68 ms /    95 tokens (    2.66 ms per token,   375.96 tokens per second)
llama_print_timings:        eval time =      69.32 ms /     2 runs   (   34.66 ms per token,    28.85 tokens per second)
llama_print_timings:       total time =     

In [44]:
file_path = "classificacoes_nao_informadas.txt"

In [46]:
with open(file_path, "w") as file:
    for item in answers:
        file.write(f"{item}\n")