In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import gc
import time
import pandas as pd

In [2]:
torch.cuda.empty_cache()
gc.collect()
print('There are %d GPU(s) available.' % torch.cuda.device_count())
print("Using GPU:", torch.cuda.get_device_name(0))

There are 2 GPU(s) available.
Using GPU: Tesla T4


In [3]:
data = pd.read_excel('/kaggle/input/marked-data/marked_data.xlsx')

In [4]:
model_name = "/kaggle/input/deepseek-r1/transformers/deepseek-r1-distill-qwen-7b/2"
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",
    device_map="cuda"
)
tokenizer = AutoTokenizer.from_pretrained(model_name)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [5]:
def request_to_model(comment, prompt):
    prompt = f"'{prompt}''{comment}'"
    messages = [
    {"role": "user", "content": prompt}
    ]
    text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
    )
    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)

    generated_ids = model.generate(
        **model_inputs,
        max_new_tokens=100000,
        pad_token_id=tokenizer.eos_token_id
    )
    generated_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]

    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
    return response

In [6]:
prompt1 = (
            f"What is the sentiment expressed in the following comment?"
            f"Select sentiment value from positive, negative, or neutral. "
            f"Return only the sentiment value in small letters.\n\n"
            f"comment: "
        )

In [7]:
result_responses = list()
time_res = list()
i_comment = list()
answer = list()
sentiment = list()
for i in range(8000,8100):
    start_time = time.time()  
    comment = data['text'][i]
    res = request_to_model(comment, prompt1)
    result_responses.append(res) 
    stop_time = time.time()
    time_res.append(stop_time - start_time)
    i_comment.append(i)
    if "</think>" in res:
        sentiment.append(res.split("</think>\n\n",1)[1])
    else:
        sentiment.append(0)

In [8]:
df_res = pd.DataFrame({'i_comment': i_comment,'text_response': result_responses, 'time': time_res, 'sentiment': sentiment})

In [9]:
df_res

Unnamed: 0,i_comment,text_response,time,sentiment
0,8000,"Okay, so I need to figure out the sentiment of...",35.726094,negative
1,8001,"Okay, I need to determine the sentiment of the...",26.031150,positive
2,8002,"Okay, so I need to determine the sentiment of ...",26.204930,negative
3,8003,"Okay, so I need to figure out the sentiment of...",38.960386,negative
4,8004,"Okay, so I'm trying to figure out the sentimen...",35.791315,positive
...,...,...,...,...
95,8095,"Okay, so I need to figure out the sentiment of...",39.299850,negative
96,8096,"Okay, I need to determine the sentiment of the...",30.897975,positive
97,8097,"Alright, so I need to figure out the sentiment...",37.158324,negative
98,8098,"Alright, I need to figure out the sentiment of...",34.660710,neutral


In [15]:
df_res.to_excel('df_DeepSeek_8000-8099.xlsx')