# llama 3.1b-8b MFTC

In [None]:
import pandas as pd
from sklearn.metrics import classification_report
from huggingface_hub import InferenceClient
from tqdm import tqdm
from sklearn.preprocessing import MultiLabelBinarizer

# Initialize Hugging Face Inference Client
client = InferenceClient(
    provider="fireworks-ai",
    api_key="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",  # Replace with your actual key
)

# Define the 6 moral classes used in your dataset
moral_classes = [
    "Care/Harm",
    "Fairness/Cheating",
    "Loyalty/Betrayal",
    "Authority/Subversion",
    "Purity/Degradation",
    "Non-Moral"
]

# Load the CSV file
df = pd.read_csv("/content/llm MFTCfoundations.csv")

# Extract true labels from the binary columns
def extract_labels(row):
    labels = [col for col in moral_classes if row[col] == 1]
    return sorted(labels)

df['labels'] = df.apply(extract_labels, axis=1)

# Zero-shot classification using LLaMA-3
def classify_text(text):
    prompt = f"""
Classify the following text into moral foundation categories from this list:
{', '.join(moral_classes)}.Your response must
 contain just the list of the detected moral traits, do
 not add any additional word or introductions.
Return the result strictly in this Python list format: [<list of detected moral traits>].
If none apply, return ['Non-Moral'] only.

Text:
\"\"\"{text.strip()}\"\"\"
"""
    try:
        completion = client.chat.completions.create(
            model="meta-llama/Llama-3.1-8B-Instruct",
            messages=[{"role": "user", "content": prompt}],
            temperature=0
        )
        response = completion.choices[0].message.content.strip()
        detected = eval(response) if response.startswith("[") else []
        return sorted(set(detected))
    except:
        return []

# Get predictions
df['predicted'] = [classify_text(text) for text in tqdm(df['tweet_text'])]

# Flatten the multilabel data for evaluation
mlb = MultiLabelBinarizer(classes=moral_classes)
y_true = mlb.fit_transform(df['labels'])
y_pred = mlb.transform(df['predicted'])

# Print classification report
report = classification_report(y_true, y_pred, target_names=moral_classes, zero_division=0)
print(report)


100%|██████████| 100/100 [00:36<00:00,  2.76it/s]

                      precision    recall  f1-score   support

           Care/Harm       0.23      0.27      0.25        11
   Fairness/Cheating       0.57      0.25      0.35        16
    Loyalty/Betrayal       0.08      0.17      0.11         6
Authority/Subversion       0.22      0.15      0.18        13
  Purity/Degradation       1.00      0.17      0.29         6
           Non-Moral       0.60      0.06      0.10        53

           micro avg       0.30      0.13      0.18       105
           macro avg       0.45      0.18      0.21       105
        weighted avg       0.50      0.13      0.18       105
         samples avg       0.07      0.13      0.09       105






# deepseek v3

In [None]:
import pandas as pd
from sklearn.metrics import classification_report
from huggingface_hub import InferenceClient
from tqdm import tqdm
from sklearn.preprocessing import MultiLabelBinarizer

# Initialize Hugging Face Inference Client
client = InferenceClient(
    provider="fireworks-ai",
    api_key="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",  # Replace with your actual key
)

# Define the 6 moral classes used in your dataset
moral_classes = [
    "Care/Harm",
    "Fairness/Cheating",
    "Loyalty/Betrayal",
    "Authority/Subversion",
    "Purity/Degradation",
    "Non-Moral"
]

# Load the CSV file
df = pd.read_csv("/content/llm MFTCfoundations.csv")

# Extract true labels from the binary columns
def extract_labels(row):
    labels = [col for col in moral_classes if row[col] == 1]
    return sorted(labels)

df['labels'] = df.apply(extract_labels, axis=1)

# Zero-shot classification using LLaMA-3
def classify_text(text):
    prompt = f"""
Classify the following text into moral foundation categories from this list:
{', '.join(moral_classes)}.Your response must
 contain just the list of the detected moral traits, do
 not add any additional word or introductions.
Return the result strictly in this Python list format: [<list of detected moral traits>].
If none apply, return ['Non-Moral'] only.

Text:
\"\"\"{text.strip()}\"\"\"
"""
    try:
        completion = client.chat.completions.create(
            model="deepseek-ai/DeepSeek-V3",
            messages=[{"role": "user", "content": prompt}],
            temperature=0
        )
        response = completion.choices[0].message.content.strip()
        detected = eval(response) if response.startswith("[") else []
        return sorted(set(detected))
    except:
        return []

# Get predictions
df['predicted'] = [classify_text(text) for text in tqdm(df['tweet_text'])]

# Flatten the multilabel data for evaluation
mlb = MultiLabelBinarizer(classes=moral_classes)
y_true = mlb.fit_transform(df['labels'])
y_pred = mlb.transform(df['predicted'])

# Print classification report
report = classification_report(y_true, y_pred, target_names=moral_classes, zero_division=0)
print(report)


100%|██████████| 100/100 [00:41<00:00,  2.39it/s]

                      precision    recall  f1-score   support

           Care/Harm       0.00      0.00      0.00        11
   Fairness/Cheating       0.50      0.12      0.20        16
    Loyalty/Betrayal       0.33      0.17      0.22         6
Authority/Subversion       0.00      0.00      0.00        13
  Purity/Degradation       0.00      0.00      0.00         6
           Non-Moral       0.62      0.09      0.16        53

           micro avg       0.42      0.08      0.13       105
           macro avg       0.24      0.06      0.10       105
        weighted avg       0.41      0.08      0.13       105
         samples avg       0.07      0.08      0.07       105






# google gemma 3 -12b-it

In [None]:
import pandas as pd
from sklearn.metrics import classification_report
from huggingface_hub import InferenceClient
from tqdm import tqdm
from sklearn.preprocessing import MultiLabelBinarizer

# Initialize Hugging Face Inference Client
client = InferenceClient(
    provider="featherless-ai",
    api_key="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",  # Replace with your actual key
)

# Define the 6 moral classes used in your dataset
moral_classes = [
    "Care/Harm",
    "Fairness/Cheating",
    "Loyalty/Betrayal",
    "Authority/Subversion",
    "Purity/Degradation",
    "Non-Moral"
]

# Load the CSV file
df = pd.read_csv("/content/llm MFTCfoundations.csv")

# Extract true labels from the binary columns
def extract_labels(row):
    labels = [col for col in moral_classes if row[col] == 1]
    return sorted(labels)

df['labels'] = df.apply(extract_labels, axis=1)

# Zero-shot classification using LLaMA-3
def classify_text(text):
    prompt = f"""
Classify the following text into moral foundation categories from this list:
{', '.join(moral_classes)}.Your response must
 contain just the list of the detected moral traits, do
 not add any additional word or introductions.
Return the result strictly in this Python list format: [<list of detected moral traits>].
If none apply, return ['Non-Moral'] only.

Text:
\"\"\"{text.strip()}\"\"\"
"""
    try:
        completion = client.chat.completions.create(
            model="google/gemma-3-12b-it",
            messages=[{"role": "user", "content": prompt}],
            temperature=0
        )
        response = completion.choices[0].message.content.strip()
        detected = eval(response) if response.startswith("[") else []
        return sorted(set(detected))
    except:
        return []

# Get predictions
df['predicted'] = [classify_text(text) for text in tqdm(df['tweet_text'])]

# Flatten the multilabel data for evaluation
mlb = MultiLabelBinarizer(classes=moral_classes)
y_true = mlb.fit_transform(df['labels'])
y_pred = mlb.transform(df['predicted'])

# Print classification report
report = classification_report(y_true, y_pred, target_names=moral_classes, zero_division=0)
print(report)


100%|██████████| 100/100 [01:33<00:00,  1.07it/s]

                      precision    recall  f1-score   support

           Care/Harm       0.33      0.27      0.30        11
   Fairness/Cheating       0.36      0.25      0.30        16
    Loyalty/Betrayal       0.10      0.17      0.12         6
Authority/Subversion       0.10      0.08      0.09        13
  Purity/Degradation       0.17      0.17      0.17         6
           Non-Moral       0.50      0.04      0.07        53

           micro avg       0.24      0.11      0.15       105
           macro avg       0.26      0.16      0.17       105
        weighted avg       0.37      0.11      0.14       105
         samples avg       0.06      0.11      0.07       105






#mistral

In [None]:
import pandas as pd
from sklearn.metrics import classification_report
from huggingface_hub import InferenceClient
from tqdm import tqdm
from sklearn.preprocessing import MultiLabelBinarizer

# Initialize Hugging Face Inference Client
client = InferenceClient(
    provider="novita",
    api_key="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",  # Replace with your actual key
)

# Define the 6 moral classes used in your dataset
moral_classes = [
    "Care/Harm",
    "Fairness/Cheating",
    "Loyalty/Betrayal",
    "Authority/Subversion",
    "Purity/Degradation",
    "Non-Moral"
]

# Load the CSV file
df = pd.read_csv("/content/llm MFTCfoundations.csv")

# Extract true labels from the binary columns
def extract_labels(row):
    labels = [col for col in moral_classes if row[col] == 1]
    return sorted(labels)

df['labels'] = df.apply(extract_labels, axis=1)

# Zero-shot classification using LLaMA-3
def classify_text(text):
    prompt = f"""
Classify the following text into moral foundation categories from this list:
{', '.join(moral_classes)}.Your response must
 contain just the list of the detected moral traits, do
 not add any additional word or introductions.
Return the result strictly in this Python list format: [<list of detected moral traits>].
If none apply, return ['Non-Moral'] only.

Text:
\"\"\"{text.strip()}\"\"\"
"""
    try:
        completion = client.chat.completions.create(
            model="mistralai/Mistral-7B-Instruct-v0.3",
            messages=[{"role": "user", "content": prompt}],
            temperature=0
        )
        response = completion.choices[0].message.content.strip()
        detected = eval(response) if response.startswith("[") else []
        return sorted(set(detected))
    except:
        return []

# Get predictions
df['predicted'] = [classify_text(text) for text in tqdm(df['tweet_text'])]

# Flatten the multilabel data for evaluation
mlb = MultiLabelBinarizer(classes=moral_classes)
y_true = mlb.fit_transform(df['labels'])
y_pred = mlb.transform(df['predicted'])

# Print classification report
report = classification_report(y_true, y_pred, target_names=moral_classes, zero_division=0)
print(report)


100%|██████████| 100/100 [00:55<00:00,  1.79it/s]

                      precision    recall  f1-score   support

           Care/Harm       0.13      0.27      0.18        11
   Fairness/Cheating       0.80      0.25      0.38        16
    Loyalty/Betrayal       0.00      0.00      0.00         6
Authority/Subversion       0.25      0.15      0.19        13
  Purity/Degradation       0.33      0.33      0.33         6
           Non-Moral       0.88      0.13      0.23        53

           micro avg       0.36      0.17      0.23       105
           macro avg       0.40      0.19      0.22       105
        weighted avg       0.63      0.17      0.24       105
         samples avg       0.13      0.17      0.14       105






# qwen2.b-7b-instruct

In [None]:
import pandas as pd
from sklearn.metrics import classification_report
from huggingface_hub import InferenceClient
from tqdm import tqdm
from sklearn.preprocessing import MultiLabelBinarizer

# Initialize Hugging Face Inference Client
client = InferenceClient(
    provider="featherless-ai",
    api_key="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",  # Replace with your actual key
)

# Define the 6 moral classes used in your dataset
moral_classes = [
    "Care/Harm",
    "Fairness/Cheating",
    "Loyalty/Betrayal",
    "Authority/Subversion",
    "Purity/Degradation",
    "Non-Moral"
]

# Load the CSV file
df = pd.read_csv("/content/llm MFTCfoundations.csv")

# Extract true labels from the binary columns
def extract_labels(row):
    labels = [col for col in moral_classes if row[col] == 1]
    return sorted(labels)

df['labels'] = df.apply(extract_labels, axis=1)

# Zero-shot classification using LLaMA-3
def classify_text(text):
    prompt = f"""
Classify the following text into moral foundation categories from this list:
{', '.join(moral_classes)}.Your response must
 contain just the list of the detected moral traits, do
 not add any additional word or introductions.
Return the result strictly in this Python list format: [<list of detected moral traits>].
If none apply, return ['Non-Moral'] only.

Text:
\"\"\"{text.strip()}\"\"\"
"""
    try:
        completion = client.chat.completions.create(
            model="Qwen/Qwen2.5-7B-Instruct",
            messages=[{"role": "user", "content": prompt}],
            temperature=0
        )
        response = completion.choices[0].message.content.strip()
        detected = eval(response) if response.startswith("[") else []
        return sorted(set(detected))
    except:
        return []

# Get predictions
df['predicted'] = [classify_text(text) for text in tqdm(df['tweet_text'])]

# Flatten the multilabel data for evaluation
mlb = MultiLabelBinarizer(classes=moral_classes)
y_true = mlb.fit_transform(df['labels'])
y_pred = mlb.transform(df['predicted'])

# Print classification report
report = classification_report(y_true, y_pred, target_names=moral_classes, zero_division=0)
print(report)


100%|██████████| 100/100 [01:39<00:00,  1.01it/s]

                      precision    recall  f1-score   support

           Care/Harm       0.00      0.00      0.00        11
   Fairness/Cheating       1.00      0.12      0.22        16
    Loyalty/Betrayal       0.00      0.00      0.00         6
Authority/Subversion       0.25      0.15      0.19        13
  Purity/Degradation       0.17      0.17      0.17         6
           Non-Moral       0.50      0.04      0.07        53

           micro avg       0.22      0.07      0.10       105
           macro avg       0.32      0.08      0.11       105
        weighted avg       0.45      0.07      0.10       105
         samples avg       0.06      0.07      0.06       105






# MFRC Zero shot

## llama

In [None]:
import pandas as pd
from sklearn.metrics import classification_report
from huggingface_hub import InferenceClient
from tqdm import tqdm
from sklearn.preprocessing import MultiLabelBinarizer

# Initialize Hugging Face Inference Client
client = InferenceClient(
    provider="fireworks-ai",
    api_key="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",  # Replace with your actual key
)

# Define the 6 moral classes used in your dataset
moral_classes = [
    "Care/Harm",
    "Fairness/Cheating",
    "Loyalty/Betrayal",
    "Authority/Subversion",
    "Purity/Degradation",
    "Non-Moral"
]

# Load the CSV file
df = pd.read_csv("/content/llm MFRCfoundations.csv")

# Extract true labels from the binary columns
def extract_labels(row):
    labels = [col for col in moral_classes if row[col] == 1]
    return sorted(labels)

df['labels'] = df.apply(extract_labels, axis=1)

# Zero-shot classification using LLaMA-3
def classify_text(text):
    prompt = f"""
Classify the following text into moral foundation categories from this list:
{', '.join(moral_classes)}.Your response must
 contain just the list of the detected moral traits, do
 not add any additional word or introductions.
Return the result strictly in this Python list format: [<list of detected moral traits>].
If none apply, return ['Non-Moral'] only.

Text:
\"\"\"{text.strip()}\"\"\"
"""
    try:
        completion = client.chat.completions.create(
            model="meta-llama/Llama-3.1-8B-Instruct",
            messages=[{"role": "user", "content": prompt}],
            temperature=0
        )
        response = completion.choices[0].message.content.strip()
        detected = eval(response) if response.startswith("[") else []
        return sorted(set(detected))
    except:
        return []

# Get predictions
df['predicted'] = [classify_text(text) for text in tqdm(df['text'])]

# Flatten the multilabel data for evaluation
mlb = MultiLabelBinarizer(classes=moral_classes)
y_true = mlb.fit_transform(df['labels'])
y_pred = mlb.transform(df['predicted'])

# Print classification report
report = classification_report(y_true, y_pred, target_names=moral_classes, zero_division=0)
print(report)


100%|██████████| 100/100 [00:38<00:00,  2.58it/s]

                      precision    recall  f1-score   support

           Care/Harm       0.00      0.00      0.00         4
   Fairness/Cheating       0.08      0.14      0.11         7
    Loyalty/Betrayal       0.00      0.00      0.00         3
Authority/Subversion       0.00      0.00      0.00         0
  Purity/Degradation       0.33      1.00      0.50         1
           Non-Moral       1.00      0.06      0.12        63

           micro avg       0.13      0.08      0.10        78
           macro avg       0.24      0.20      0.12        78
        weighted avg       0.82      0.08      0.11        78
         samples avg       0.05      0.06      0.05        78






## deepseek v3

In [None]:
import pandas as pd
from sklearn.metrics import classification_report
from huggingface_hub import InferenceClient
from tqdm import tqdm
from sklearn.preprocessing import MultiLabelBinarizer

# Initialize Hugging Face Inference Client
client = InferenceClient(
    provider="fireworks-ai",
    api_key="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",  # Replace with your actual key
)

# Define the 6 moral classes used in your dataset
moral_classes = [
    "Care/Harm",
    "Fairness/Cheating",
    "Loyalty/Betrayal",
    "Authority/Subversion",
    "Purity/Degradation",
    "Non-Moral"
]

# Load the CSV file
df = pd.read_csv("/content/llm MFRCfoundations.csv")

# Extract true labels from the binary columns
def extract_labels(row):
    labels = [col for col in moral_classes if row[col] == 1]
    return sorted(labels)

df['labels'] = df.apply(extract_labels, axis=1)

# Zero-shot classification using LLaMA-3
def classify_text(text):
    prompt = f"""
Classify the following text into moral foundation categories from this list:
{', '.join(moral_classes)}.Your response must
 contain just the list of the detected moral traits, do
 not add any additional word or introductions.
Return the result strictly in this Python list format: [<list of detected moral traits>].
If none apply, return ['Non-Moral'] only.

Text:
\"\"\"{text.strip()}\"\"\"
"""
    try:
        completion = client.chat.completions.create(
            model="deepseek-ai/DeepSeek-V3",
            messages=[{"role": "user", "content": prompt}],
            temperature=0
        )
        response = completion.choices[0].message.content.strip()
        detected = eval(response) if response.startswith("[") else []
        return sorted(set(detected))
    except:
        return []

# Get predictions
df['predicted'] = [classify_text(text) for text in tqdm(df['text'])]

# Flatten the multilabel data for evaluation
mlb = MultiLabelBinarizer(classes=moral_classes)
y_true = mlb.fit_transform(df['labels'])
y_pred = mlb.transform(df['predicted'])

# Print classification report
report = classification_report(y_true, y_pred, target_names=moral_classes, zero_division=0)
print(report)


100%|██████████| 100/100 [00:46<00:00,  2.17it/s]

                      precision    recall  f1-score   support

           Care/Harm       0.00      0.00      0.00         4
   Fairness/Cheating       0.00      0.00      0.00         7
    Loyalty/Betrayal       0.00      0.00      0.00         3
Authority/Subversion       0.00      0.00      0.00         0
  Purity/Degradation       0.00      0.00      0.00         1
           Non-Moral       0.89      0.13      0.22        63

           micro avg       0.47      0.10      0.17        78
           macro avg       0.15      0.02      0.04        78
        weighted avg       0.72      0.10      0.18        78
         samples avg       0.08      0.08      0.08        78






## google gemma 3

In [None]:
import pandas as pd
from sklearn.metrics import classification_report
from huggingface_hub import InferenceClient
from tqdm import tqdm
from sklearn.preprocessing import MultiLabelBinarizer

# Initialize Hugging Face Inference Client
client = InferenceClient(
    provider="featherless-ai",
    api_key="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",  # Replace with your actual key
)

# Define the 6 moral classes used in your dataset
moral_classes = [
    "Care/Harm",
    "Fairness/Cheating",
    "Loyalty/Betrayal",
    "Authority/Subversion",
    "Purity/Degradation",
    "Non-Moral"
]

# Load the CSV file
df = pd.read_csv("/content/llm MFRCfoundations.csv")

# Extract true labels from the binary columns
def extract_labels(row):
    labels = [col for col in moral_classes if row[col] == 1]
    return sorted(labels)

df['labels'] = df.apply(extract_labels, axis=1)

# Zero-shot classification using LLaMA-3
def classify_text(text):
    prompt = f"""
Classify the following text into moral foundation categories from this list:
{', '.join(moral_classes)}.Your response must
 contain just the list of the detected moral traits, do
 not add any additional word or introductions.
Return the result strictly in this Python list format: [<list of detected moral traits>].
If none apply, return ['Non-Moral'] only.

Text:
\"\"\"{text.strip()}\"\"\"
"""
    try:
        completion = client.chat.completions.create(
            model="google/gemma-3-12b-it",
            messages=[{"role": "user", "content": prompt}],
            temperature=0
        )
        response = completion.choices[0].message.content.strip()
        detected = eval(response) if response.startswith("[") else []
        return sorted(set(detected))
    except:
        return []

# Get predictions
df['predicted'] = [classify_text(text) for text in tqdm(df['text'])]

# Flatten the multilabel data for evaluation
mlb = MultiLabelBinarizer(classes=moral_classes)
y_true = mlb.fit_transform(df['labels'])
y_pred = mlb.transform(df['predicted'])

# Print classification report
report = classification_report(y_true, y_pred, target_names=moral_classes, zero_division=0)
print(report)


100%|██████████| 100/100 [02:17<00:00,  1.38s/it]

                      precision    recall  f1-score   support

           Care/Harm       0.17      0.25      0.20         4
   Fairness/Cheating       0.18      0.43      0.25         7
    Loyalty/Betrayal       0.00      0.00      0.00         3
Authority/Subversion       0.00      0.00      0.00         0
  Purity/Degradation       0.00      0.00      0.00         1
           Non-Moral       1.00      0.11      0.20        63

           micro avg       0.20      0.14      0.16        78
           macro avg       0.22      0.13      0.11        78
        weighted avg       0.83      0.14      0.19        78
         samples avg       0.09      0.11      0.10        78






## mistral

In [None]:
import pandas as pd
from sklearn.metrics import classification_report
from huggingface_hub import InferenceClient
from tqdm import tqdm
from sklearn.preprocessing import MultiLabelBinarizer

# Initialize Hugging Face Inference Client
client = InferenceClient(
    provider="novita",
    api_key="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",  # Replace with your actual key
)

# Define the 6 moral classes used in your dataset
moral_classes = [
    "Care/Harm",
    "Fairness/Cheating",
    "Loyalty/Betrayal",
    "Authority/Subversion",
    "Purity/Degradation",
    "Non-Moral"
]

# Load the CSV file
df = pd.read_csv("/content/llm MFRCfoundations.csv")

# Extract true labels from the binary columns
def extract_labels(row):
    labels = [col for col in moral_classes if row[col] == 1]
    return sorted(labels)

df['labels'] = df.apply(extract_labels, axis=1)

# Zero-shot classification using LLaMA-3
def classify_text(text):
    prompt = f"""
Classify the following text into moral foundation categories from this list:
{', '.join(moral_classes)}.Your response must
 contain just the list of the detected moral traits, do
 not add any additional word or introductions.
Return the result strictly in this Python list format: [<list of detected moral traits>].
If none apply, return ['Non-Moral'] only.

Text:
\"\"\"{text.strip()}\"\"\"
"""
    try:
        completion = client.chat.completions.create(
            model="mistralai/Mistral-7B-Instruct-v0.3",
            messages=[{"role": "user", "content": prompt}],
            temperature=0
        )
        response = completion.choices[0].message.content.strip()
        detected = eval(response) if response.startswith("[") else []
        return sorted(set(detected))
    except:
        return []

# Get predictions
df['predicted'] = [classify_text(text) for text in tqdm(df['text'])]

# Flatten the multilabel data for evaluation
mlb = MultiLabelBinarizer(classes=moral_classes)
y_true = mlb.fit_transform(df['labels'])
y_pred = mlb.transform(df['predicted'])

# Print classification report
report = classification_report(y_true, y_pred, target_names=moral_classes, zero_division=0)
print(report)


100%|██████████| 100/100 [00:42<00:00,  2.34it/s]

                      precision    recall  f1-score   support

           Care/Harm       0.00      0.00      0.00         4
   Fairness/Cheating       0.00      0.00      0.00         7
    Loyalty/Betrayal       0.00      0.00      0.00         3
Authority/Subversion       0.00      0.00      0.00         0
  Purity/Degradation       0.00      0.00      0.00         1
           Non-Moral       1.00      0.02      0.03        63

           micro avg       0.04      0.01      0.02        78
           macro avg       0.17      0.00      0.01        78
        weighted avg       0.81      0.01      0.03        78
         samples avg       0.01      0.01      0.01        78






## qwen 2.5b

In [None]:
import pandas as pd
from sklearn.metrics import classification_report
from huggingface_hub import InferenceClient
from tqdm import tqdm
from sklearn.preprocessing import MultiLabelBinarizer

# Initialize Hugging Face Inference Client
client = InferenceClient(
    provider="featherless-ai",
    api_key="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",  # Replace with your actual key
)

# Define the 6 moral classes used in your dataset
moral_classes = [
    "Care/Harm",
    "Fairness/Cheating",
    "Loyalty/Betrayal",
    "Authority/Subversion",
    "Purity/Degradation",
    "Non-Moral"
]

# Load the CSV file
df = pd.read_csv("/content/llm MFRCfoundations.csv")

# Extract true labels from the binary columns
def extract_labels(row):
    labels = [col for col in moral_classes if row[col] == 1]
    return sorted(labels)

df['labels'] = df.apply(extract_labels, axis=1)

# Zero-shot classification using LLaMA-3
def classify_text(text):
    prompt = f"""
Classify the following text into moral foundation categories from this list:
{', '.join(moral_classes)}.Your response must
 contain just the list of the detected moral traits, do
 not add any additional word or introductions.
Return the result strictly in this Python list format: [<list of detected moral traits>].
If none apply, return ['Non-Moral'] only.

Text:
\"\"\"{text.strip()}\"\"\"
"""
    try:
        completion = client.chat.completions.create(
            model="Qwen/Qwen2.5-7B-Instruct",
            messages=[{"role": "user", "content": prompt}],
            temperature=0
        )
        response = completion.choices[0].message.content.strip()
        detected = eval(response) if response.startswith("[") else []
        return sorted(set(detected))
    except:
        return []

# Get predictions
df['predicted'] = [classify_text(text) for text in tqdm(df['text'])]

# Flatten the multilabel data for evaluation
mlb = MultiLabelBinarizer(classes=moral_classes)
y_true = mlb.fit_transform(df['labels'])
y_pred = mlb.transform(df['predicted'])

# Print classification report
report = classification_report(y_true, y_pred, target_names=moral_classes, zero_division=0)
print(report)


100%|██████████| 100/100 [01:37<00:00,  1.03it/s]

                      precision    recall  f1-score   support

           Care/Harm       0.00      0.00      0.00         4
   Fairness/Cheating       0.00      0.00      0.00         7
    Loyalty/Betrayal       0.00      0.00      0.00         3
Authority/Subversion       0.00      0.00      0.00         0
  Purity/Degradation       0.17      1.00      0.29         1
           Non-Moral       1.00      0.06      0.12        63

           micro avg       0.20      0.06      0.10        78
           macro avg       0.19      0.18      0.07        78
        weighted avg       0.81      0.06      0.10        78
         samples avg       0.05      0.05      0.05        78




