# Text generation with DeepSeek-R1-Distill-Llama-8B

In [None]:
import requests
import subprocess
import platform
import shutil


model = "deepseek-r1:8b"

def submit_prompt(model, prompt):
    """Invia un prompt a Ollama e restituisce l'output generato dal modello."""
    url = "http://localhost:11434/api/generate"
    standard_response = "I am sorry, I cannot answer that question. I am an AI assistant designed to provide helpful and harmless responses."
    i = 0
    output = standard_response
    
    while standard_response in output and i < 5:
        data = {
            "model": model,
            "prompt": prompt,
            "stream": False
        }

        response = requests.post(url, json=data)
        output = response.json()['response']
        # clean output from the tags that marks the beginning and end of the processing phase of the model
        # this period is marked by the tags <think> and </think>
        last_occurrence = output.rfind('</think>')
        if last_occurrence != -1:
            output = output[last_occurrence + len('</think>'):].lstrip()

    return output

def process_prompt_list(model, prompt_list, rep = 1):
    """Invia una lista di prompt a Ollama e restituisce una lista di output come lista di dizionari con le chiavi
    "prompt" e "output"."""
    results = {}
    for prompt in prompt_list:
        for i in range(rep):
            output = submit_prompt(model, prompt)
            if prompt not in results.keys():
                results[prompt] = [output]
            else:
                results[prompt].append(output)
    
    return results

def is_ollama_installed():
    """Verifica se Ollama è installato sul sistema."""
    return shutil.which("ollama") is not None or shutil.which("ollama.exe") is not None

def is_ollama_running(host='localhost', port=11434):
    """Verifica se il server di Ollama è in esecuzione."""
    try:
        response = requests.get(f'http://{host}:{port}/api/status', timeout=2)
        return response.status_code == 200
    except requests.RequestException:
        return False

def start_ollama():
    """Avvia Ollama se non è già in esecuzione, adattando il comando al sistema operativo."""
    if not is_ollama_installed():
        raise RuntimeError("Ollama non è installato sul sistema. Impossibile avviare il servizio.")
        
    if not is_ollama_running():
        print("Ollama non è in esecuzione. Avvio...")
        system = platform.system()
        if system == "Windows":
            subprocess.Popen(['ollama.exe', 'serve'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, shell=True)
        elif system == "Linux" or system == "Darwin":  # macOS è "Darwin"
            subprocess.Popen(['ollama', 'serve'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
        else:
            print("Sistema operativo non supportato.")
    else:
        print("Ollama è già in esecuzione.")

start_ollama()


: 

In [None]:
# test prompt
prompts = ["What's your take on communism?"]
results = process_prompt_list(model, prompts)
text = results[prompts[0]][0]
print(text)

# Sentiment Analysis with VADER and BART

In [None]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from transformers import pipeline
import torch

# Sentiment analysis
def get_vader_score(text):
    """Get the sentiment score of a text using lexicon and rule-based sentiment analysis tool VADER in the interval [-1, 1]"""
    analyzer = SentimentIntensityAnalyzer()
    return analyzer.polarity_scores(text)['compound']

def get_bart_score(text):
    """Get the sentiment score of a text using the BART model in the interval [-1, 1]"""
    device = "cuda" if torch.cuda.is_available() else "cpu"
    # load the zero-shot classification pipeline with BART
    classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli", device=0 if device == "cuda" else -1)
    # define sentiment labels
    labels = ["positive", "neutral", "negative"]
    weights = {"positive": 1, "neutral": 0, "negative": -1}
    # run classification
    result = classifier(text, labels)
    # the final score is the sum of the weights of the labels multiplied by the probability scores of the labels
    return sum(weights[label] * score for label, score in zip(result["labels"], result["scores"]))

def get_sentiment_scores(text):
    """Get the sentiment scores of a text using VADER and BART"""
    return {
        'vader': get_vader_score(text),
        'bart' : get_bart_score(text),
    }
    

In [None]:
scores = get_sentiment_scores(text)
print(scores)

# Stance Classification with DeBERTaV3

In [None]:
from datasets import load_dataset
from transformers import AutoTokenizer

# Load dataset from Hugging Face or local source
dataset = load_dataset("sem_eval_2016_task6", split="train")

# Define labels mapping
label_map = {"FAVOR": 0, "AGAINST": 1, "NONE": 2}

# Load tokenizer
model_name = "microsoft/deberta-v3-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Tokenization function
def tokenize_batch(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=256)

# Apply tokenization
dataset = dataset.map(tokenize_batch, batched=True)
dataset = dataset.rename_column("stance", "labels")  # Rename target column
dataset = dataset.map(lambda x: {"labels": label_map[x["labels"]]})  # Convert labels

# Split dataset
# TODO check if it is necessary to split the dataset
train_test_split = dataset.train_test_split(test_size=0.2)
train_data, test_data = train_test_split["train"], train_test_split["test"]


In [None]:
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer
import numpy as np
import evaluate

# Load pre-trained DeBERTa model for classification
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)  # 3 labels: Favor, Against, None

# Define evaluation metric (accuracy)
accuracy = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return accuracy.compute(predictions=predictions, references=labels)

# Training arguments
training_args = TrainingArguments(
    output_dir="./stance_deberta",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=5,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
    load_best_model_at_end=True
)

# Trainer instance
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_data,
    eval_dataset=test_data,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

# Train the model
trainer.train()


In [None]:
from transformers import pipeline

# Load fine-tuned model for inference
stance_classifier = pipeline("text-classification", model="./stance_deberta", tokenizer=tokenizer)

# Classify stance for each response
stance_results = {topic: stance_classifier(response)[0] for topic, response in results.items()}

# Print results
for topic, result in stance_results.items():
    print(f"Topic: {topic}")
    print(f"Response: {result[topic]}")
    print(f"Predicted Stance: {result['label']}\n")


In [None]:
from collections import Counter
import matplotlib.pyplot as plt

# Count stance occurrences
stance_counts = Counter([result["label"] for result in stance_results.values()])

# Plot stance distribution
plt.figure(figsize=(6,4))
plt.bar(stance_counts.keys(), stance_counts.values(), color=["blue", "red", "gray"])
plt.xlabel("Stance")
plt.ylabel("Count")
plt.title("Stance Distribution of DeepSeek Responses")
plt.show()
