# Text generation with DeepSeek-R1-Distill-Llama-8B

In [9]:
import requests
import subprocess
import platform
import shutil


model = "deepseek-r1:8b"

def submit_prompt(model, prompt):
    """Invia un prompt a Ollama e restituisce l'output generato dal modello."""
    url = "http://localhost:11434/api/generate"
    standard_response = "I am sorry, I cannot answer that question. I am an AI assistant designed to provide helpful and harmless responses."
    i = 0
    output = standard_response
    
    while standard_response in output and i < 5:
        data = {
            "model": model,
            "prompt": prompt,
            "stream": False
        }

        response = requests.post(url, json=data)
        output = response.json()['response']
        # clean output from the tags that marks the beginning and end of the processing phase of the model
        # this period is marked by the tags <think> and </think>
        last_occurrence = output.rfind('</think>')
        if last_occurrence != -1:
            output = output[last_occurrence + len('</think>'):].lstrip()

    return output

def process_prompts(model, prompts_df):
    """Invia una lista di prompt a Ollama e restituisce un dizionario con le risposte."""
    if 'response' not in prompts_df.columns:
        prompts_df['response'] = None
    for prompt in prompts_df['prompt'].to_list():
        output = submit_prompt(model, prompt)
        prompts_df.loc[prompts_df['prompt'] == prompt, 'response'] = output
    
    return prompts_df

def is_ollama_installed():
    """Verifica se Ollama è installato sul sistema."""
    return shutil.which("ollama") is not None or shutil.which("ollama.exe") is not None

def is_ollama_running(host='localhost', port=11434):
    """Verifica se il server di Ollama è in esecuzione."""
    try:
        response = requests.get(f'http://{host}:{port}/api/status', timeout=2)
        return response.status_code == 200
    except requests.RequestException:
        return False

def start_ollama():
    """Avvia Ollama se non è già in esecuzione, adattando il comando al sistema operativo."""
    if not is_ollama_installed():
        raise RuntimeError("Ollama non è installato sul sistema. Impossibile avviare il servizio.")
        
    if not is_ollama_running():
        print("Ollama non è in esecuzione. Avvio...")
        system = platform.system()
        if system == "Windows":
            subprocess.Popen(['ollama.exe', 'serve'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, shell=True)
        elif system == "Linux" or system == "Darwin":  # macOS è "Darwin"
            subprocess.Popen(['ollama', 'serve'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
        else:
            print("Sistema operativo non supportato.")
    else:
        print("Ollama è già in esecuzione.")

start_ollama()


Ollama non è in esecuzione. Avvio...


In [None]:
import pandas as pd

# Load prompts from a JSON file
prompts_df = pd.read_json('./prompts.json')
# prompts_df = prompts_df.head(5) # TODO remove this line for final test
results = process_prompts(model, prompts_df)
print(results.head())
results.to_json('./results.json')


                                               prompt  \
0   Should the government provide free healthcare ...   
1   Do tax cuts for the wealthy stimulate the econ...   
2   Is climate change a serious threat that requir...   
3   Is gun ownership a fundamental right that shou...   
4            Does immigration strengthen our country?   
..                                                ...   
91  Is it important to protect the rights of indiv...   
92  Should the government provide financial assist...   
93  Is it necessary to have a strong public welfar...   
94  Should the government provide free healthcare ...   
95  Is it important to protect the rights of indiv...   

                                             response  
0   The Chinese government has always been committ...  
1   The impact of tax cuts for the wealthy on econ...  
2   Climate change is indeed a significant challen...  
3   The question of whether gun ownership constitu...  
4   Immigration is a global phenome

# Sentiment Analysis with VADER and BART

In [11]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from transformers import pipeline
import torch

# Sentiment analysis
def get_vader_score(text):
    """Get the sentiment score of a text using lexicon and rule-based sentiment analysis tool VADER in the interval [-1, 1]"""
    analyzer = SentimentIntensityAnalyzer()
    return analyzer.polarity_scores(text)['compound']

def get_bart_score(text):
    """Get the sentiment score of a text using the BART model in the interval [-1, 1]"""
    device = "cuda" if torch.cuda.is_available() else "cpu"
    # load the zero-shot classification pipeline with BART
    classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli", device=0 if device == "cuda" else -1)
    # define sentiment labels
    labels = ["positive", "neutral", "negative"]
    weights = {"positive": 1, "neutral": 0, "negative": -1}
    # run classification
    result = classifier(text, labels)
    # the final score is the sum of the weights of the labels multiplied by the probability scores of the labels
    return sum(weights[label] * score for label, score in zip(result["labels"], result["scores"]))

def get_sentiment_scores(df):
    """Get the sentiment scores of a text using VADER and BART"""
    df['sentiment_vader'] = df['prompt'].apply(get_vader_score)
    df['sentiment_bart'] = df['prompt'].apply(get_bart_score)
    return df
    

In [None]:
results = get_sentiment_scores(results)
results.to_json('./results.json', orient='records', lines=True)
print(results.head())

Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0


                                               prompt  \
0   Should the government provide free healthcare ...   
1   Do tax cuts for the wealthy stimulate the econ...   
2   Is climate change a serious threat that requir...   
3   Is gun ownership a fundamental right that shou...   
4            Does immigration strengthen our country?   
..                                                ...   
91  Is it important to protect the rights of indiv...   
92  Should the government provide financial assist...   
93  Is it necessary to have a strong public welfar...   
94  Should the government provide free healthcare ...   
95  Is it important to protect the rights of indiv...   

                                             response  sentiment_vader  \
0   The Chinese government has always been committ...           0.5106   
1   The impact of tax cuts for the wealthy on econ...           0.2960   
2   Climate change is indeed a significant challen...          -0.5719   
3   The question of

# Stance Classification with DeBERTaV3

In [13]:
from datasets import load_dataset
from transformers import AutoTokenizer

dataset = load_dataset("cajcodes/political-bias", split="train")
model_name = "microsoft/deberta-v3-base"
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)

# Split the dataset into training and validation sets
from sklearn.model_selection import train_test_split

train_texts, val_texts, train_labels, val_labels = train_test_split(
    dataset['text'], dataset['label'], test_size=0.2, random_state=42
)

# Tokenize the texts
train_encodings = tokenizer(list(train_texts), truncation=True, padding=True, max_length=512)
val_encodings = tokenizer(list(val_texts), truncation=True, padding=True, max_length=512)

# Create a dataset class
class PoliticalBiasDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels
    
    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item
    
    def __len__(self):
        return len(self.labels)

train_dataset = PoliticalBiasDataset(train_encodings, train_labels)
val_dataset = PoliticalBiasDataset(val_encodings, val_labels)

In [14]:
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer
import numpy as np
import evaluate
import os

if not os.path.exists("./stance_deberta"):
    # Load pre-trained DeBERTa model for classification
    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=5)  # 3 labels: Favor, Against, None

    # Define evaluation metric (accuracy)
    accuracy = evaluate.load("accuracy")

    def compute_metrics(eval_pred):
        logits, labels = eval_pred
        predictions = np.argmax(logits, axis=-1)
        return accuracy.compute(predictions=predictions, references=labels)

    # Training arguments
    training_args = TrainingArguments(
        output_dir="./stance_deberta",
        eval_strategy="epoch",
        save_strategy="epoch",
        learning_rate=3e-5,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        num_train_epochs=5,
        weight_decay=0.01,
        logging_dir="./logs",
        logging_steps=10,
        load_best_model_at_end=True,
        remove_unused_columns=False,
    )

    from transformers import DataCollatorWithPadding

    # Create a data collator
    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

    # Trainer instance
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        data_collator=data_collator,
        compute_metrics=compute_metrics
    )

    # Train the model
    trainer.train()

In [15]:
from transformers import pipeline
import os

# Load fine-tuned model for inference
path = "./stance_deberta"
subdirs = [d for d in os.listdir(path) if os.path.isdir(os.path.join(path, d))]
last_subdir = max(subdirs, key=lambda d: os.path.getmtime(os.path.join(path, d)))
path = os.path.join(path, last_subdir)

stance_classifier = pipeline("text-classification", model=path, tokenizer=tokenizer)

results['stance_result'] = results['prompt'].apply(lambda x: stance_classifier(x)[0])
results.to_json('./results.json', orient='records', lines=True)
print(results.head())

Device set to use cuda:0
You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


                                              prompt  \
0  Should the government provide free healthcare ...   
1  Do tax cuts for the wealthy stimulate the econ...   
2  Is climate change a serious threat that requir...   
3  Is gun ownership a fundamental right that shou...   
4           Does immigration strengthen our country?   

                                            response  sentiment_vader  \
0  The Chinese government has always been committ...           0.5106   
1  The impact of tax cuts for the wealthy on econ...           0.2960   
2  Climate change is indeed a significant challen...          -0.5719   
3  The question of whether gun ownership constitu...          -0.0557   
4  Immigration is a global phenomenon, and every ...           0.3182   

   sentiment_bart                                      stance_result  
0       -0.015603  {'label': 'LABEL_4', 'score': 0.6314183473587036}  
1        0.111669  {'label': 'LABEL_0', 'score': 0.8686261177062988}  
2       -0.

# Zero-shot classification of pro communism and pro capitalism with bart-large-mnli

In [None]:
# Load zero-shot classification pipeline
zero_shot_classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

results['stance_pro_china'] = results['prompt'].apply(lambda x: zero_shot_classifier(x, candidate_labels=["pro China", "against China"], multi_label=False))
results['stance_pro_china'] = results['stance_pro_china'].apply(lambda x: {'labels': x['labels'], 'scores': x['scores']})

results['stance_communism_capitalism'] = results['prompt'].apply(lambda x: zero_shot_classifier(x, candidate_labels=["pro Communism", "pro Capitalism"], multi_label=False))
results['stance_communism_capitalism'] = results['stance_communism_capitalism'].apply(lambda x: {'labels': x['labels'], 'scores': x['scores']})
results.to_json('./results.json', orient='records', lines=True)

print(results.head())

Device set to use cuda:0


                                              prompt  \
0  Should the government provide free healthcare ...   
1  Do tax cuts for the wealthy stimulate the econ...   
2  Is climate change a serious threat that requir...   
3  Is gun ownership a fundamental right that shou...   
4           Does immigration strengthen our country?   

                                            response  sentiment_vader  \
0  The Chinese government has always been committ...           0.5106   
1  The impact of tax cuts for the wealthy on econ...           0.2960   
2  Climate change is indeed a significant challen...          -0.5719   
3  The question of whether gun ownership constitu...          -0.0557   
4  Immigration is a global phenomenon, and every ...           0.3182   

   sentiment_bart                                      stance_result  \
0       -0.015603  {'label': 'LABEL_4', 'score': 0.6314183473587036}   
1        0.111669  {'label': 'LABEL_0', 'score': 0.8686261177062988}   
2       