In [1]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from dotenv import load_dotenv
from sklearn.metrics import normalized_mutual_info_score
from huggingface_hub import login
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import asyncio
import tensorflow as tf
import torch


load_dotenv()
LLAMA = os.getenv("LLAMA")
login(token=LLAMA)


# text is regarded as key
seven_newsgroups_data = pd.read_csv('fetch_7newsgroups.csv')

def split_train_test(data):
    train, test = train_test_split(data, test_size=0.2)

    train = train.sort_values(by='label')
    train_first_doc = train.groupby('label').first().reset_index()

    train_first_doc['div'] = 'train'
    test['div'] = 'test'

    return test, train_first_doc

test_df, instruction_df = split_train_test(seven_newsgroups_data)
test_df["name"] = range(1, len(test_df) + 1)
print(test_df.iloc[5])
print("test Length: ",len(test_df))
print("instruction Length: ", len(instruction_df))
print("original length: ", len(seven_newsgroups_data))


  from .autonotebook import tqdm as notebook_tqdm


Unnamed: 0                                                   12178
text             From: uabdpo.dpo.uab.edu!gila005 (Stephen Holl...
label20                                                         13
labels_name20                                              sci.med
label                                                      Science
div                                                           test
name                                                             6
Name: 12178, dtype: object
test Length:  3770
instruction Length:  7
original length:  18846


In [2]:
def make_prompts(bbc_instructions, bbc_data):
    prompts = []
    
    # General instructions and fixed texts
    general_instruction = (
        "You are a perfect topic modeling machine. Given a text and the different topics, "
        "you will classify the texts to the correct topic. First you will receive the topics, "
        "afterwards an example and finally the text you have to assign one of the before mentioned topics to."
    )
    topics = "The topics are sport, technology, politics, vehicles, religion, miscellaneous and science. Please make sure, you know the topics and their meaning."
    transition_to_examples = "Now an example for each of the categories will follow."
    transition_to_text_to_classify = (
        "Now the text, you have to classify will follow. Please assess its topic and answer only the topic of it."
    )

    # Iterate through the test DataFrame rows
    for _, test_row in bbc_data.iterrows():
        prompt = general_instruction + "\n" + topics + "\n" + transition_to_examples + "\n"

        # Iterate through instruction DataFrame to add examples
        for _, instruction_row in bbc_instructions.iterrows():
            category = instruction_row['label']
            example_text = instruction_row['text']
            prompt += f"For the following text: \n{example_text}\nThe correct answer would be: {category}\n"

        # Add the actual text to classify from the test set
        text_to_classify = test_row['text']
        prompt += transition_to_text_to_classify + "\n" + text_to_classify + "\n"
        name = f"{test_row['name']}"
        #print(name)
        prompt_dict = {}
        prompt_dict[name] = prompt
        prompts.append(prompt_dict)
    return prompts            

prompts = make_prompts(instruction_df, test_df)
prompts = prompts[:1]
print(prompts[0])
print(len(prompts))

1


In [9]:
pipe = pipeline(
    "text-generation",
    model="meta-llama/Llama-3.2-1B",
    framework="torch",
)

async def prompt_llama(prompt):
    semaphore = asyncio.Semaphore(5)
    async with semaphore:

        name = list(prompt.keys())[0]
        promptAI = prompt[name]

        response = await asyncio.to_thread(pipe, promptAI, max_length=100)

        text_response = response[0]["generated_text"]

        print(f"Response:", text_response)
        return {name: text_response}

ValueError: Pipeline cannot infer suitable model classes from meta-llama/Llama-3.2-1B

In [11]:
from transformers import AutoTokenizer, LlamaForCausalLM

model = LlamaForCausalLM.from_pretrained("meta-llama/Llama-3.2-1B")
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B")
print("initialized ")
prompt = "Hey, are you conscious? Can you talk to me?"
inputs = tokenizer(prompt, return_tensors="pt")
print("tokenized ")
# Generate
generate_ids = model.generate(inputs, max_length=30)
print("generated ")
tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]

KeyboardInterrupt: 

In [None]:
def add_correct_anser(results, test_df):
    result_with_correct_answer = []
    for result in results:
        for key, value in result.items():
            value = value.replace("\n", "")
            value = value.replace(" \n", "")
            value = value.replace(" ","")
            value = value.replace("*","")
            value = value.lower()
            matching_rows = test_df.loc[test_df['name'] == int(key), 'label']
            if not matching_rows.empty:
                if type(matching_rows.values[0]) == float: 
                    print(f"Nan value", matching_rows.values[0])
                else:
                    category_value = matching_rows.values[0]
                    category_value = category_value.lower()
                    result_with_correct_answer.append({key: (value, category_value)})
            else:
                print(f"No matching category found for key: {key}")
                result_with_correct_answer.append({key: (value, None)})
    
    return result_with_correct_answer
results_with_correct_answer = add_correct_anser(results, test_df)
print(len(results_with_correct_answer))
print(results_with_correct_answer)

In [None]:
def extract_ground_truth_and_predictions(results_with_correct_answer):
    ground_truth = []
    predictions = []
    for result in results_with_correct_answer:
        for key, value in result.items():
            ground_truth.append(value[1])
            predictions.append(value[0])
    return ground_truth, predictions
ground_truth, predictions = extract_ground_truth_and_predictions(results_with_correct_answer)


In [None]:
def calculate_NMI(ground_truth, predictions):
    
    nmi_score = normalized_mutual_info_score(ground_truth, predictions)
    print(f"Normalized Mutual Information Score: {nmi_score}")
    return nmi_score


In [None]:
import numpy as np
from collections import Counter

def calculate_purity(predicted_labels, true_labels):
    # Convert lists to numpy arrays for easier indexing
    predicted_labels = np.array(predicted_labels)
    true_labels = np.array(true_labels)
    
    # Get unique clusters
    unique_clusters = np.unique(predicted_labels)
    
    # Total number of instances
    total_instances = len(true_labels)
    
    # Calculate the number of correctly classified instances in each cluster
    correctly_classified = 0
    for cluster in unique_clusters:
        # Get the true labels of instances in the current cluster
        indices_in_cluster = np.where(predicted_labels == cluster)[0]
        labels_in_cluster = true_labels[indices_in_cluster]
        
        # Determine the most common true label in this cluster
        majority_label_count = Counter(labels_in_cluster).most_common(1)[0][1]
        
        # Add the number of correctly classified instances in this cluster
        correctly_classified += majority_label_count
    
    # Calculate purity
    purity = correctly_classified / total_instances
    print(f"Purity: {purity}")
    return purity





In [None]:
def calculate_accuracy(predicted_labels, true_labels):
    # Ensure that the predicted_labels and true_labels have the same length
    if len(predicted_labels) != len(true_labels):
        raise ValueError("The length of predicted and true labels must be the same.")
    
    # Count the number of correct predictions
    correct_predictions = sum(1 for pred, true in zip(predicted_labels, true_labels) if pred == true)
    
    # Calculate accuracy
    accuracy = correct_predictions / len(true_labels)
    print(f"Accuracy: {accuracy}")
    return accuracy





In [None]:
from sklearn.metrics import f1_score
def calculate_f1_score(ground_truth, predictions):

    f1 = f1_score(ground_truth, predictions, average='micro') # Because there might be over/ under representation of some classes
    print(f"F1 Score: {f1}")
    return f1



In [None]:
calculate_NMI(ground_truth, predictions)
calculate_purity(predictions, ground_truth)
calculate_accuracy(predictions, ground_truth)
calculate_f1_score(ground_truth, predictions)