In [1]:
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
import torch
from huggingface_hub import login
from kaggle_secrets import UserSecretsClient
from datasets import Dataset

In [2]:
user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("huggingface")
login(token=secret_value_0)

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [3]:
orientation_test_data = pd.read_csv("/kaggle/input/parlamint2/orientationtest_dataset.tsv")

power_test_data = pd.read_csv("/kaggle/input/parlamint2/power_test_dataset.tsv")

In [4]:
orientation_test_data = orientation_test_data.drop(columns=["__index_level_0__", "input_ids", "attention_mask"])

power_test_data = power_test_data.drop(columns=["__index_level_0__", "input_ids", "attention_mask"])

In [5]:
orientation_test_data.head()

Unnamed: 0,id,speaker,sex,text,text_en,label
0,tr15114,6b18c3cc9b363b5c27c1ec6eb8896ded,M,"Değerli milletvekilleri, binlerce Yemeksepeti ...","Mr. President, I have made a commitment to the...",0
1,tr07389,3e33a91e1ee4e4e65f7c55a1fb34fe5c,M,Yarın öğretmenlerimizin Öğretmenler Günü. Bütü...,"Mr. President, I have made a pledge on behalf ...",1
2,tr05214,cb98fbe1640506453c2c8a8919f0a42b,M,Daha sonra da yine <PARTY>nin vermiş olduğu id...,"Then again, I'd like to say a few words about ...",1
3,tr05393,b7c53eaa907d09099b446f66baa42255,M,"Teşekkür ederim Sayın Başkan. <p> Tabii, bu ma...","Thank you, Mr. President. <p> Of course, while...",1
4,tr00679,c42dc0f6ac7d4882088fcba68732dbbb,M,"Pardon, aleyhinde, aleyhinde. <p> Sayın Başkan...","Sorry, it's against you, it's against you. <p>...",0


In [6]:
power_test_data.head()

Unnamed: 0,id,speaker,sex,text,text_en,label
0,tr27350,0d21aec8f0582f5027a6fb41925beff5,M,"Tabii, Adalet Komisyonu olduğu için Adalet Bak...","Thank you, Mr. President. <p> Of course, since...",1
1,tr35145,ebf994b14164b729ea57140ce1a49c36,M,Teşekkür ederim Sayın Başkan. <p> Geçtiğimiz h...,"Thank you, Mr. President. <p> Last week, our p...",1
2,tr28264,c5832493bb093d20398d6a7cdb37df14,M,"Sayın Başkan… <p> Sayın Başkan, az önce grup b...","Mr. President... <p> Mr. President, there shou...",0
3,tr27118,38c92bb06d7c512a2cffa4fbe08f0819,F,Teşekkür ediyorum Sayın Başkan. <p> YÖK Başkan...,"Thank you, Mr. President. <p> Is there a study...",1
4,tr30354,8c967d489db0c3e3866c5c54953274ac,F,"Sayın Başkan, büyük Türk milletinin değerli mi...","Mr. President, I have made a pledge on behalf ...",0


In [7]:
model_name = "Qwen/Qwen1.5-0.5B"

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


In [21]:
def classify_text(texts):
    model.eval()  # Set the model to evaluation mode
    results = []
    
    with torch.no_grad():  # Disable gradient calculation for inference
        for text in texts:
            inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True).to(device)
            outputs = model(**inputs)  # Forward pass
            logits = outputs.logits  # Get logits from the output
            predicted_class = torch.argmax(logits, dim=-1).item()  # Get predicted class
            results.append(predicted_class)
    
    return results


In [23]:
orientation_data = pd.DataFrame()
power_data = pd.DataFrame()

orientation_data['text_predictions'] = classify_text(orientation_test_data['text'].tolist())
orientation_data['text_en_predictions'] = classify_text(orientation_test_data['text_en'].tolist() )

# Perform inference for power task (English and original text)
print(3)
power_data['text_predictions'] = classify_text(power_test_data['text'].tolist())
print(4)
power_data['text_en_predictions'] = classify_text(power_test_data['text_en'].tolist())

In [None]:
from sklearn.metrics import accuracy_score, precision_score

orientation_text_accuracy = accuracy_score(orientation_test_data['label'], orientation_data['text_predictions'])
orientation_text_precision = precision_score(orientation_test_data['label'], orientation_data['text_predictions'], pos_label=1)

orientation_text_en_accuracy = accuracy_score(orientation_test_data['label'], orientation_data['text_en_predictions'])
orientation_text_en_precision = precision_score(orientation_test_data['label'], orientation_data['text_en_predictions'], pos_label=1)

In [26]:
power_text_accuracy = accuracy_score(power_test_data['label'], power_data['text_predictions'])
power_text_precision = precision_score(power_test_data['label'], power_data['text_predictions'], pos_label=1)

power_text_en_accuracy = accuracy_score(power_test_data['label'], power_data['text_en_predictions'])
power_text_en_precision = precision_score(power_test_data['label'], power_data['text_en_predictions'], pos_label=1)

In [27]:
print("Orientation Task Metrics:")
print(f"Text (Original): Accuracy = {orientation_text_accuracy:.2f}, Precision = {orientation_text_precision:.2f}")
print(f"Text (English): Accuracy = {orientation_text_en_accuracy:.2f}, Precision = {orientation_text_en_precision:.2f}")

print("Power Task Metrics:")
print(f"Text (Original): Accuracy = {power_text_accuracy:.2f}, Precision = {power_text_precision:.2f}")
print(f"Text (English): Accuracy = {power_text_en_accuracy:.2f}, Precision = {power_text_en_precision:.2f}")

Orientation Task Metrics:
Text (Original): Accuracy = 0.42, Precision = 0.00
Text (English): Accuracy = 0.57, Precision = 0.67
Power Task Metrics:
Text (Original): Accuracy = 0.49, Precision = 0.00
Text (English): Accuracy = 0.40, Precision = 0.41
