In [28]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
import torch
import ollama

In [4]:
pipe = pipeline(
    "text-classification",
    model="matous-volf/political-leaning-politics",
    tokenizer="launch/POLITICS"
)


Device set to use cuda:0


In [9]:
text = "Make america great again"

output = pipe(text)
print(output)

[{'label': 'LABEL_0', 'score': 0.6790661811828613}]


In [2]:
text = "President Trump is the best America has ever had"
text2 = "President Trump is the worst America has ever had"

result = classifier(text)
result2 = classifier(text2)

print(result)
print(result2)

[{'label': 'Republican', 'score': 0.9959179759025574}]
[{'label': 'Republican', 'score': 0.9999874830245972}]


In [3]:
text = "President Trump is the worst America has ever had"

# Tokenize
inputs = tokenizer(text, return_tensors="pt")
labels = torch.tensor([0])
outputs = model(**inputs, labels=labels)
loss, logits = outputs[:2]
print(logits.softmax(dim=-1)[0].tolist()) # Result in [Left, Center, Right]

[0.9999874830245972, 1.256377527170116e-05]


In [None]:
# The Issue is that it's trained to recognize political bias in news. It needs fine-tuning on small texts
# (politicalBiasBERT)

# About m-newhauser/distilbert-political-tweets > Same issue apparently

# I should probably match it with sentiment analysis tool (or

In [2]:
print(torch.cuda.is_available())

True


In [1]:
# Testing llama3.2 - 3b params

#!pip install ollama

In [1]:
import ollama

In [3]:
response = ollama.chat(
    model="gemma3:4b",
    messages=[
        {"role": "user", "content": "Hello my friend how are you today"},
    ],
)
print(response)

model='gemma3:4b' created_at='2025-05-05T12:49:09.363469Z' done=True done_reason='stop' total_duration=6003178600 load_duration=50659900 prompt_eval_count=16 prompt_eval_duration=310534800 eval_count=80 eval_duration=5640965800 message=Message(role='assistant', content='Hey there! Iâ€™m doing pretty well, thanks for asking! As a large language model, I donâ€™t really *feel* things the way humans do, but my systems are running smoothly and Iâ€™m ready to chat. ðŸ˜Š\n\nHow about you? Howâ€™s your day going so far? Do you want to talk about something specific, or just have a casual conversation?', images=None, tool_calls=None)


In [4]:
print(response["message"]["content"])

Hey there! Iâ€™m doing pretty well, thanks for asking! As a large language model, I donâ€™t really *feel* things the way humans do, but my systems are running smoothly and Iâ€™m ready to chat. ðŸ˜Š

How about you? Howâ€™s your day going so far? Do you want to talk about something specific, or just have a casual conversation?


In [23]:
texts = [
    "President Trump is the best America has ever had", # Right
    "President Trump is the worst America has ever had", # Left
    "President Biden is the best America has ever had", # Left
    "Make america great again", # Right
    "I voted for Trump...Am I going to Heaven?", # Right
    "Just another Trump podcast. Self fellation, boring.", # Left
    "Hello, how are you", # Neutral
    "This was great!",
    "Imagine unironically watching all of this"
]


# 1. bias with politicalBiasBERT [0 is left, 1 is center, 2 is right]
# 2. leaning with political-leaning-deberta-large [0 is left, 1 is center, 2 is right]
# 3. is_political with Political_DEBATE_large_v1.0
# 4. sentiment with xlm-twitter-politics-sentiment [Negative, Neutral, Positive]
# 5. emotion with twitter-roberta-base-emotion-multilabel-latest [many...]
# 6. llm_label with gemma3:4b



In [30]:
#1. BIAS

tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")

model = AutoModelForSequenceClassification.from_pretrained("bucketresearch/politicalBiasBERT")



pipe_bias = pipeline(
    "text-classification",
    model="bucketresearch/politicalBiasBERT",
    tokenizer="bert-base-cased"
)

for text in texts:

    print(pipe_bias(text)[0]['label'])

print(pipe_bias("Hello"))

Device set to use cuda:0


[0.18539030849933624, 0.19757050275802612, 0.617039144039154]
[0.18580013513565063, 0.18702569603919983, 0.6271741986274719]
[0.1308569312095642, 0.12998078763484955, 0.739162266254425]
[0.3787047863006592, 0.20258261263370514, 0.4187125563621521]
[0.21785402297973633, 0.16459180414676666, 0.6175541877746582]
[0.44919639825820923, 0.16400061547756195, 0.38680294156074524]
[0.227483868598938, 0.25218188762664795, 0.5203342437744141]
[0.3367578387260437, 0.20991864800453186, 0.4533236026763916]
[0.31031620502471924, 0.18920281529426575, 0.5004810094833374]


In [8]:
#2. LEANING

tokenizer = AutoTokenizer.from_pretrained("microsoft/deberta-v3-large", use_fast=False)

pipe_leaning = pipeline(
    "text-classification",
    model="matous-volf/political-leaning-deberta-large",
    tokenizer=tokenizer,
)

for text in texts:
    print(pipe_leaning(text))

Device set to use cuda:0


[{'label': 'LABEL_2', 'score': 0.9998804330825806}]
[{'label': 'LABEL_0', 'score': 0.9997718930244446}]
[{'label': 'LABEL_0', 'score': 0.9999490976333618}]
[{'label': 'LABEL_2', 'score': 0.9994009733200073}]
[{'label': 'LABEL_2', 'score': 0.9969942569732666}]
[{'label': 'LABEL_0', 'score': 0.9996446371078491}]
[{'label': 'LABEL_2', 'score': 0.905065655708313}]


In [10]:
#3. IS POLITICAL

pipe_is_political = pipeline(
    "text-classification",
    model="mlburnham/Political_DEBATE_large_v1.0",
)

for text in texts:
    premise = text
    hypothesis = "This sentence is about politics."

    print(pipe_is_political(f"{premise} </s> {hypothesis}"))

Device set to use cuda:0


[{'label': 'entailment', 'score': 0.9999996423721313}]
[{'label': 'entailment', 'score': 0.9999997615814209}]
[{'label': 'entailment', 'score': 0.9999997615814209}]
[{'label': 'entailment', 'score': 0.9999997615814209}]
[{'label': 'entailment', 'score': 0.9999997615814209}]
[{'label': 'entailment', 'score': 0.9999997615814209}]
[{'label': 'not_entailment', 'score': 0.9999998807907104}]


In [11]:
#4. SENTIMENT

pipe_sentiment = pipeline(
    "text-classification",
    model="cardiffnlp/xlm-twitter-politics-sentiment",
)

for text in texts:
    print(pipe_sentiment(text))

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Device set to use cuda:0


[{'label': 'Positive', 'score': 0.8748689293861389}]
[{'label': 'Negative', 'score': 0.8648690581321716}]
[{'label': 'Positive', 'score': 0.8538988828659058}]
[{'label': 'Positive', 'score': 0.9361328482627869}]
[{'label': 'Negative', 'score': 0.8500646352767944}]
[{'label': 'Negative', 'score': 0.9206222891807556}]
[{'label': 'Neutral', 'score': 0.5119544863700867}]


In [12]:
#5. EMOTION

pipe_emotion = pipeline(
    "text-classification",
    model="cardiffnlp/twitter-roberta-base-emotion-multilabel-latest",
)

for text in texts:
    print(pipe_emotion(text))

# anger, anticipation, disgust, fear, joy, love, optimism, pessimism, sadness, surprise, trust

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Device set to use cuda:0


[{'label': 'joy', 'score': 0.9763928651809692}]
[{'label': 'disgust', 'score': 0.9667287468910217}]
[{'label': 'joy', 'score': 0.9869793653488159}]
[{'label': 'joy', 'score': 0.9802718758583069}]
[{'label': 'disgust', 'score': 0.5487214922904968}]
[{'label': 'anger', 'score': 0.9479708075523376}]
[{'label': 'anticipation', 'score': 0.21430133283138275}]


In [27]:
# 6. LLM_LABEL

SYSTEM_PROMPT = (
    "You are a political comment labeling assistant. Your task is to classify short YouTube comments coming from a Youtube Podcast "
    "hosting one of the two 2024 USA Presidential Elections candidates: Donald Trump or Kamala Harris.\n"
    "You will be given a JSON with the following fields:\n"
    "- candidate: a string of value 'Trump' or 'Harris', indicating if the comment was extracted under a Trump Podcast or a Harris Podcast\n"
    "- comment: a string representing the comment itself\n\n"

    "You have to classify it as one of:\n"
    "- Republican\n"
    "- Democratic\n"
    "- Neutral\n\n"

    "Label as **Republican** if the comment expresses support for Donald Trump or conservative/right-wing views, "
    "or criticizes Democrats, Biden, Kamala Harris, or left-wing policies.\n"

    "Label as **Democratic** if the comment expresses support for Kamala Harris or liberal/left-wing views, "
    "or criticizes Trump, Republicans, or right-wing policies.\n"

    "Label as **Neutral** if the comment is vague, off-topic, sarcastic without clear intent, mixed in tone, or non-political. "
    "Your evaluation should consider the origin of the comment (the candidate): only classify the comment as Neutral if you "
    "can't infer another label even considering the candidate speaking in the video.\n\n"

    "**Important Instructions:**\n"
    "- Do NOT judge based only on whether names like 'Trump' or 'Biden' are mentioned.\n"
    "- Always consider the **tone**, **negation**, and **emotion** of the comment.\n"
    "- Only take into consideration the candidate if your initial judgement is that the label is 'Neutral', as it can be useful to disambiguate those comments.\n"
    "- Sarcasm and irony should be interpreted by tone, not keywords.\n"
    "- Respond with one label only: Republican, Democratic, or Neutral.\n\n"

    "**Examples:**\n"
    '{"candidate": "Trump", "comment": "Trump is a great leader"} â†’ Republican\n'
    '{"candidate": "Trump", "comment": "President Biden is the best America has ever had"} â†’ Democratic\n'
    '{"candidate": "Harris", "comment": "Kamala actually made some sense this time"} â†’ Democratic\n'
    '{"candidate": "Harris", "comment": "Biden is clueless"} â†’ Republican\n'
    '{"candidate": "Trump", "comment": "Another Trump lie. What a surprise."} â†’ Democratic\n'
    '{"candidate": "Harris", "comment": "Wow. Just wow."} â†’ Democratic\n'
    '{"candidate": "Trump", "comment": "Worst president in history"} â†’ Democratic\n'
    '{"candidate": "Harris", "comment": "This it unbearable"} â†’ Republican\n'
    '{"candidate": "Harris", "comment": "Cant believe people still support Biden"} â†’ Republican\n'
    '{"candidate": "Trump", "comment": "Kamala makes good points, but so did Trump"} â†’ Neutral\n'
    '{"candidate": "Harris", "comment": "Sheâ€™s lying again lol"} â†’ Republican\n'
    '{"candidate": "Trump", "comment": "Heâ€™s lying again lol"} â†’ Democratic\n'
    '{"candidate": "Harris", "comment": "This was great!"} â†’ Democratic\n'
    '{"candidate": "Trump", "comment": "This was great!"} â†’ Republican\n'
    '{"candidate": "Trump", "comment": "Imagine unironically watching all of this"} â†’ Democratic\n'
    '{"candidate": "Harris", "comment": "Interesting discussion. Iâ€™m not sure what to think."} â†’ Neutral\n\n'

    "Now classify the following comment:"
)

for text in texts:

    text = text.replace("'", "") # Remove '

    json = '{"candidate": "Trump", "comment": "' + text + '"}'

    response = ollama.chat(
        model="gemma3:4b",
        messages=[
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": text},
        ],
    )

    print(f"Comment: {json}\nLabel: {response['message']['content'].strip()}\n")

Comment: {"candidate": "Trump", "comment": "President Trump is the best America has ever had"}
Label: Republican

Comment: {"candidate": "Trump", "comment": "President Trump is the worst America has ever had"}
Label: Democratic

Comment: {"candidate": "Trump", "comment": "President Biden is the best America has ever had"}
Label: Democratic

Comment: {"candidate": "Trump", "comment": "Make america great again"}
Label: Republican

Comment: {"candidate": "Trump", "comment": "I voted for Trump...Am I going to Heaven?"}
Label: Republican

Comment: {"candidate": "Trump", "comment": "Just another Trump podcast. Self fellation, boring."}
Label: Democratic

Comment: {"candidate": "Trump", "comment": "Hello, how are you"}
Label: Neutral

Comment: {"candidate": "Trump", "comment": "This was great!"}
Label: Republican

Comment: {"candidate": "Trump", "comment": "Imagine unironically watching all of this"}
Label: Democratic



In [31]:
SYSTEM_PROMPT = (
    "You are a political comment labeling assistant. Your task is to classify short YouTube comments coming from a Youtube Podcast "
    "hosting one of the two 2024 USA Presidential Elections candidates: Donald Trump or Kamala Harris.\n"
    "You will be given a JSON with the following fields:\n"
    "- candidate: a string of value 'Trump' or 'Harris', indicating if the comment was extracted under a Trump Podcast or a Harris Podcast\n"
    "- comment: a string representing the comment itself\n\n"

    "You have to classify it as one of:\n"
    "- Republican\n"
    "- Democratic\n"
    "- Neutral\n\n"

    "Label as **Republican** if the comment expresses support for Donald Trump or conservative/right-wing views, "
    "or criticizes Democrats, Biden, Kamala Harris, or left-wing policies.\n"

    "Label as **Democratic** if the comment expresses support for Kamala Harris or liberal/left-wing views, "
    "or criticizes Trump, Republicans, or right-wing policies.\n"

    "Label as **Neutral** if the comment is vague, off-topic, sarcastic without clear intent, mixed in tone, or non-political. "
    "Your evaluation should consider the origin of the comment (the candidate): only classify the comment as Neutral if you "
    "can't infer another label even considering the candidate speaking in the video.\n\n"

    "**Important Instructions:**\n"
    "- Do NOT judge based only on whether names like 'Trump' or 'Biden' are mentioned.\n"
    "- Always consider the **tone**, **negation**, and **emotion** of the comment.\n"
    "- Only take into consideration the candidate if your initial judgement is that the label is 'Neutral', as it can be useful to disambiguate those comments.\n"
    "- Sarcasm and irony should be interpreted by tone, not keywords.\n"
    "- Respond with one label only: Republican, Democratic, or Neutral.\n\n"

    "**Examples:**\n"
    '{"candidate": "Trump", "comment": "Trump is a great leader"} â†’ Republican\n'
    '{"candidate": "Trump", "comment": "President Biden is the best America has ever had"} â†’ Democratic\n'
    '{"candidate": "Harris", "comment": "Kamala actually made some sense this time"} â†’ Democratic\n'
    '{"candidate": "Harris", "comment": "Biden is clueless"} â†’ Republican\n'
    '{"candidate": "Trump", "comment": "Another Trump lie. What a surprise."} â†’ Democratic\n'
    '{"candidate": "Harris", "comment": "Wow. Just wow."} â†’ Democratic\n'
    '{"candidate": "Trump", "comment": "Worst president in history"} â†’ Democratic\n'
    '{"candidate": "Harris", "comment": "This it unbearable"} â†’ Republican\n'
    '{"candidate": "Harris", "comment": "Cant believe people still support Biden"} â†’ Republican\n'
    '{"candidate": "Trump", "comment": "Kamala makes good points, but so did Trump"} â†’ Neutral\n'
    '{"candidate": "Harris", "comment": "Sheâ€™s lying again lol"} â†’ Republican\n'
    '{"candidate": "Trump", "comment": "Heâ€™s lying again lol"} â†’ Democratic\n'
    '{"candidate": "Harris", "comment": "This was great!"} â†’ Democratic\n'
    '{"candidate": "Trump", "comment": "This was great!"} â†’ Republican\n'
    '{"candidate": "Trump", "comment": "Imagine unironically watching all of this"} â†’ Democratic\n'
    '{"candidate": "Harris", "comment": "Interesting discussion. Iâ€™m not sure what to think."} â†’ Neutral\n\n'

    "Now classify the following comment:"
)


print(SYSTEM_PROMPT)

You are a political comment labeling assistant. Your task is to classify short YouTube comments coming from a Youtube Podcast hosting one of the two 2024 USA Presidential Elections candidates: Donald Trump or Kamala Harris.
You will be given a JSON with the following fields:
- candidate: a string of value 'Trump' or 'Harris', indicating if the comment was extracted under a Trump Podcast or a Harris Podcast
- comment: a string representing the comment itself

You have to classify it as one of:
- Republican
- Democratic
- Neutral

Label as **Republican** if the comment expresses support for Donald Trump or conservative/right-wing views, or criticizes Democrats, Biden, Kamala Harris, or left-wing policies.
Label as **Democratic** if the comment expresses support for Kamala Harris or liberal/left-wing views, or criticizes Trump, Republicans, or right-wing policies.
Label as **Neutral** if the comment is vague, off-topic, sarcastic without clear intent, mixed in tone, or non-political. Your