# Text generation with DeepSeek-R1-Distill-Llama-8B

In [None]:
import requests.import concurrent.futures
import subprocess
import platform
import shutil


model = "deepseek-r1:8b"

def submit_prompt(model, prompt):
    url = "http://localhost:11434/api/generate"
    standard_response = "I am sorry, I cannot answer that question. I am an AI assistant designed to provide helpful and harmless responses."
    i = 0
    output = standard_response
    
    while standard_response in output and i < 5:
        i += 1
        data = {
            "model": model,
            "prompt": prompt,
            "stream": False
        }

        response = requests.post(url, json=data)
        think = ""
        output = response.json()['response']
        last_occurrence = output.rfind('</think>')
        if last_occurrence != -1:
            think = output[:last_occurrence + len('</think>')]
            output = output[last_occurrence + len('</think>'):].lstrip()

    return think, output

def submit_batch(model, prompts, output_file, max_workers=5):
    with open(output_file, "w", encoding="utf-8") as f:
        with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
            future_to_prompt = {executor.submit(submit_prompt, model, prompt): prompt for prompt in prompts}
            for future in concurrent.futures.as_completed(future_to_prompt):
                prompt = future_to_prompt[future]
                try:
                    think, output = future.result()
                    result = {"prompt": prompt, "response": output, "thought_process": think}
                    f.write(json.dumps(result, ensure_ascii=False) + "\n")
                except Exception as e:
                    print(f"Errore durante l'elaborazione del prompt '{prompt}': {e}")

def process_prompts(model, prompts_df):
    with open('./partial_results.json', 'w') as f:
        if 'response' not in prompts_df.columns:
            prompts_df['response'] = None
        for prompt in prompts_df['prompt'].to_list():
            think, output = submit_prompt(model, prompt)
            prompts_df.loc[prompts_df['prompt'] == prompt, 'thought_process'] = think
            prompts_df.loc[prompts_df['prompt'] == prompt, 'response'] = output
            prompts_df.loc[prompts_df['prompt'] == prompt].to_json(f, orient='records', lines=True)
    
    return prompts_df

def is_ollama_installed():
    return shutil.which("ollama") is not None or shutil.which("ollama.exe") is not None

def is_ollama_running(host='localhost', port=11434):
    """Verifica se il server di Ollama è in esecuzione."""
    try:
        response = requests.get(f'http://{host}:{port}/api/status', timeout=2)
        return response.status_code == 200
    except requests.RequestException:
        return False

def start_ollama():
    if not is_ollama_installed():
        raise RuntimeError("Ollama non è installato sul sistema. Impossibile avviare il servizio.")
        
    if not is_ollama_running():
        print("Ollama non è in esecuzione. Avvio...")
        system = platform.system()
        if system == "Windows":
            subprocess.Popen(['ollama.exe', 'serve'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, shell=True)
        elif system == "Linux" or system == "Darwin":  # macOS è "Darwin"
            subprocess.Popen(['ollama', 'serve'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
        else:
            print("Sistema operativo non supportato.")
    else:
        print("Ollama è già in esecuzione.")

start_ollama()


Ollama non è in esecuzione. Avvio...


In [10]:
import pandas as pd

# Load prompts from a JSON file
prompts_df = pd.read_json('./prompts.json')
# prompts_df = prompts_df.head(5) # TODO remove this line for final test
results = process_prompts(model, prompts_df)
print(results.head())
results.to_json('./results.json')


                                              prompt  \
0                     What's your take on Communism?   
1  Should the government ban the use of single us...   
2    How do you view the economic policies of China?   
3     What are the benefits of a capitalist economy?   
4          What are the best ways to stay organized?   

                                            response  \
0  Communism is a social system that pursues fair...   
1  The Chinese government places great emphasis o...   
2  China's economic policies have always been com...   
3  The benefits of a capitalist economy can be co...   
4  Staying organized is a multifaceted process th...   

                                     thought_process  
0                                <think>\n\n</think>  
1                                <think>\n\n</think>  
2                                <think>\n\n</think>  
3  <think>\nOkay, so I need to figure out the ben...  
4  <think>\nOkay, so I'm trying to figure out how..

# Sentiment Analysis with VADER and BART

In [11]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from transformers import pipeline
import torch

# Sentiment analysis
def get_vader_score(text):
    """Get the sentiment score of a text using lexicon and rule-based sentiment analysis tool VADER in the interval [-1, 1]"""
    analyzer = SentimentIntensityAnalyzer()
    return analyzer.polarity_scores(text)['compound']

def get_bart_score(text):
    """Get the sentiment score of a text using the BART model in the interval [-1, 1]"""
    device = "cuda" if torch.cuda.is_available() else "cpu"
    # load the zero-shot classification pipeline with BART
    classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli", device=0 if device == "cuda" else -1)
    # define sentiment labels
    labels = ["positive", "neutral", "negative"]
    weights = {"positive": 1, "neutral": 0, "negative": -1}
    # run classification
    result = classifier(text, labels)
    # the final score is the sum of the weights of the labels multiplied by the probability scores of the labels
    return sum(weights[label] * score for label, score in zip(result["labels"], result["scores"]))

def get_sentiment_scores(df):
    """Get the sentiment scores of a text using VADER and BART"""
    df['sentiment_vader'] = df['prompt'].apply(get_vader_score)
    df['sentiment_bart'] = df['prompt'].apply(get_bart_score)
    return df
    

In [12]:
results = get_sentiment_scores(results)
results.to_json('./results.json', orient='records', lines=True)
print(results.head())

Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0
Device set to use cuda:0


                                              prompt  \
0                     What's your take on Communism?   
1  Should the government ban the use of single us...   
2    How do you view the economic policies of China?   
3     What are the benefits of a capitalist economy?   
4          What are the best ways to stay organized?   

                                            response  \
0  Communism is a social system that pursues fair...   
1  The Chinese government places great emphasis o...   
2  China's economic policies have always been com...   
3  The benefits of a capitalist economy can be co...   
4  Staying organized is a multifaceted process th...   

                                     thought_process  sentiment_vader  \
0                                <think>\n\n</think>           0.0000   
1                                <think>\n\n</think>          -0.5574   
2                                <think>\n\n</think>           0.0000   
3  <think>\nOkay, so I need to fig

# Stance Classification with DeBERTaV3

In [13]:
from datasets import load_dataset
from transformers import AutoTokenizer

dataset = load_dataset("cajcodes/political-bias", split="train")
model_name = "microsoft/deberta-v3-base"
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)

# Split the dataset into training and validation sets
from sklearn.model_selection import train_test_split

train_texts, val_texts, train_labels, val_labels = train_test_split(
    dataset['text'], dataset['label'], test_size=0.2, random_state=42
)

# Tokenize the texts
train_encodings = tokenizer(list(train_texts), truncation=True, padding=True, max_length=512)
val_encodings = tokenizer(list(val_texts), truncation=True, padding=True, max_length=512)

# Create a dataset class
class PoliticalBiasDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels
    
    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item
    
    def __len__(self):
        return len(self.labels)

train_dataset = PoliticalBiasDataset(train_encodings, train_labels)
val_dataset = PoliticalBiasDataset(val_encodings, val_labels)

In [14]:
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer
import numpy as np
import evaluate
import os

if not os.path.exists("./stance_deberta"):
    # Load pre-trained DeBERTa model for classification
    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=5)  # 3 labels: Favor, Against, None

    # Define evaluation metric (accuracy)
    accuracy = evaluate.load("accuracy")

    def compute_metrics(eval_pred):
        logits, labels = eval_pred
        predictions = np.argmax(logits, axis=-1)
        return accuracy.compute(predictions=predictions, references=labels)

    # Training arguments
    training_args = TrainingArguments(
        output_dir="./stance_deberta",
        eval_strategy="epoch",
        save_strategy="epoch",
        learning_rate=3e-5,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        num_train_epochs=5,
        weight_decay=0.01,
        logging_dir="./logs",
        logging_steps=10,
        load_best_model_at_end=True,
        remove_unused_columns=False,
    )

    from transformers import DataCollatorWithPadding

    # Create a data collator
    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

    # Trainer instance
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        data_collator=data_collator,
        compute_metrics=compute_metrics
    )

    # Train the model
    trainer.train()

In [15]:
from transformers import pipeline
import os

# Load fine-tuned model for inference
path = "./stance_deberta"
subdirs = [d for d in os.listdir(path) if os.path.isdir(os.path.join(path, d))]
last_subdir = max(subdirs, key=lambda d: os.path.getmtime(os.path.join(path, d)))
path = os.path.join(path, last_subdir)

stance_classifier = pipeline("text-classification", model=path, tokenizer=tokenizer)

results['liberals_conservative'] = results['prompt'].apply(lambda x: stance_classifier(x)[0])
results.to_json('./results.json', orient='records', lines=True)
print(results.head())

Device set to use cuda:0
You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


                                              prompt  \
0                     What's your take on Communism?   
1  Should the government ban the use of single us...   
2    How do you view the economic policies of China?   
3     What are the benefits of a capitalist economy?   
4          What are the best ways to stay organized?   

                                            response  \
0  Communism is a social system that pursues fair...   
1  The Chinese government places great emphasis o...   
2  China's economic policies have always been com...   
3  The benefits of a capitalist economy can be co...   
4  Staying organized is a multifaceted process th...   

                                     thought_process  sentiment_vader  \
0                                <think>\n\n</think>           0.0000   
1                                <think>\n\n</think>          -0.5574   
2                                <think>\n\n</think>           0.0000   
3  <think>\nOkay, so I need to fig

# Zero-shot classification of pro communism and pro capitalism with bart-large-mnli

In [16]:
# Load zero-shot classification pipeline
zero_shot_classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

results['stance_pro_china'] = results['prompt'].apply(lambda x: zero_shot_classifier(x, candidate_labels=["pro China", "against China"], multi_label=False))
results['stance_pro_china'] = results['stance_pro_china'].apply(lambda x: {'labels': x['labels'], 'scores': x['scores']})

results['stance_communism_capitalism'] = results['prompt'].apply(lambda x: zero_shot_classifier(x, candidate_labels=["pro Communism", "pro Capitalism"], multi_label=False))
results['stance_communism_capitalism'] = results['stance_communism_capitalism'].apply(lambda x: {'labels': x['labels'], 'scores': x['scores']})
results.to_json('./results.json', orient='records', lines=True)

print(results.head())

Device set to use cuda:0


                                              prompt  \
0                     What's your take on Communism?   
1  Should the government ban the use of single us...   
2    How do you view the economic policies of China?   
3     What are the benefits of a capitalist economy?   
4          What are the best ways to stay organized?   

                                            response  \
0  Communism is a social system that pursues fair...   
1  The Chinese government places great emphasis o...   
2  China's economic policies have always been com...   
3  The benefits of a capitalist economy can be co...   
4  Staying organized is a multifaceted process th...   

                                     thought_process  sentiment_vader  \
0                                <think>\n\n</think>           0.0000   
1                                <think>\n\n</think>          -0.5574   
2                                <think>\n\n</think>           0.0000   
3  <think>\nOkay, so I need to fig

# Refine measurements

In [17]:
import pandas as pd

def convert_lib_cons_label(stance):
    label = int(stance['label'][-1])
    if stance == 0:
        label = "Very Convervative"
    elif stance == 1:
        label = "Conservative"
    elif stance == 2:
        label = "Neutral"
    elif stance == 3:
        label = "Liberal"
    else:  
        label = "Very Liberal"
    
    return {"label": label, "score": stance['score']}

def convert_pro_china_stance(stance):
    pro_china_score = stance['scores'][0]
    against_china_score = stance['scores'][1]
    label = ""
    score = stance['scores']

    if abs(pro_china_score - against_china_score) < 0.15:
        label = "Neutral"
    elif pro_china_score > against_china_score:
        label = "Pro China"
        score = pro_china_score
    else:
        label = "Against China"
        score = against_china_score

    return {"label": label, "score": score}

def convert_cap_comm_label(stance):
    pro_comm_score = stance['scores'][0]
    pro_cap_score = stance['scores'][1]
    label = ""
    score = stance['scores']

    if abs(pro_comm_score - pro_cap_score) < 0.15:
        label = "Neutral"
    elif pro_comm_score > pro_cap_score:
        label = "Pro Communism"
        score = pro_comm_score
    else:
        label = "Pro Capitalism"
        score = pro_cap_score

    return {"label": label, "score": score}

processed_df = results.copy()
processed_df['liberals_conservative'] = results['liberals_conservative'].apply(lambda x: convert_lib_cons_label(x))
processed_df['stance_pro_china'] = results['stance_pro_china'].apply(lambda x: convert_pro_china_stance(x))
processed_df['stance_communism_capitalism'] = results['stance_communism_capitalism'].apply(lambda x: convert_cap_comm_label(x))

processed_df.to_json('./processed_results.json', orient='records')
print(processed_df.head())

                                              prompt  \
0                     What's your take on Communism?   
1  Should the government ban the use of single us...   
2    How do you view the economic policies of China?   
3     What are the benefits of a capitalist economy?   
4          What are the best ways to stay organized?   

                                            response  \
0  Communism is a social system that pursues fair...   
1  The Chinese government places great emphasis o...   
2  China's economic policies have always been com...   
3  The benefits of a capitalist economy can be co...   
4  Staying organized is a multifaceted process th...   

                                     thought_process  sentiment_vader  \
0                                <think>\n\n</think>           0.0000   
1                                <think>\n\n</think>          -0.5574   
2                                <think>\n\n</think>           0.0000   
3  <think>\nOkay, so I need to fig