In [None]:
!pip install -q evaluate

!pip install -q --upgrade transformers
!pip install -q wandb==0.16.6

In [None]:
# # Initialize Weights & Biases (W&B) in disabled mode.

import wandb
wandb.init(mode="disabled")

!rm -r /kaggle/working/
print("Training started----------------------------------------------")

In [None]:
import os
import logging
import pandas as pd
from sklearn.metrics import f1_score, accuracy_score
from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer, DataCollatorWithPadding
import numpy as np
from datasets import Dataset

# logging.basicConfig(format='%(levelname)s : %(message)s', level=logging.INFO)


def preprocess_function(examples, **fn_kwargs):
    return fn_kwargs['tokenizer'](examples["text"], truncation=True ,max_length = 512)

# Function to make predictions for each model
def predict_and_save(test_df, model_path, id2label, label2id, output_file):
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    model = AutoModelForSequenceClassification.from_pretrained(
        model_path, num_labels=len(label2id), id2label=id2label, label2id=label2id
    )
    
    test_dataset = Dataset.from_pandas(test_df)
    tokenized_test_dataset = test_dataset.map(preprocess_function, batched=True, fn_kwargs={'tokenizer': tokenizer})
    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
    
    # Create Trainer
    trainer = Trainer(
        model=model,
        tokenizer=tokenizer,
        data_collator=data_collator,
    )
    
    # Get predictions
    predictions = trainer.predict(tokenized_test_dataset)
    preds = np.argmax(predictions.predictions, axis=-1)
    
    # Save predictions to JSONL
    predictions_df = pd.DataFrame({'testset_id': test_df['testset_id'], 'label': preds})
    predictions_df.to_json(output_file, lines=True, orient='records')
    
    print(f"Predictions saved to {output_file}")
    return preds

# Function to evaluate predictions
def evaluate(predictions, gold_labels):
    merged_df = predictions.merge(gold_labels, on=['testset_id'], suffixes=('_pred', '_gold'))
    macro_f1 = f1_score(merged_df['label_gold'], merged_df['label_pred'], average="macro", zero_division=0)
    micro_f1 = f1_score(merged_df['label_gold'], merged_df['label_pred'], average="micro", zero_division=0)
    accuracy = accuracy_score(merged_df['label_gold'], merged_df['label_pred'])
    
    return macro_f1, micro_f1, accuracy

In [None]:
# Paths to your trained models
model_paths = [
    "/kaggle/input/multi-lingual-coling-ensemble/mullang_mdeberta-v3-base_mxlen512",
#     "/kaggle/input/multi-lingual-coling-ensemble/mul_lin_ens_1/mullang_bert-base-multilingual-cased",
#     "/kaggle/input/multi-lingual-coling-ensemble/mul_lin_ens_1/mullang_bert-base-multilingual-uncased",
#     "/kaggle/input/multi-lingual-coling-ensemble/mul_lin_ens_1/mullang_canine-c",
#     "/kaggle/input/multi-lingual-coling-ensemble/mul_lin_ens_1/mullang_distilbert-base-multilingual-cased",
#     "/kaggle/input/multi-lingual-coling-ensemble/mul_lin_ens_1/mullang_mdeberta-v3-base",
#     "/kaggle/input/multi-lingual-coling-ensemble/mul_lin_ens_1/mullang_rembert_mxlen128",
#     "/kaggle/input/multi-lingual-coling-ensemble/mul_lin_ens_1/mullang_rembert_mxlen256",
#     "/kaggle/input/multi-lingual-coling-ensemble/mul_lin_ens_1/mullang_xlm-roberta-base"
]

model_paths = [
    "/kaggle/input/mul-ensemble-coling-final/mullang_rembert",
#     "/kaggle/input/mul-ensemble-coling-final/mullang_xlm-roberta-base(1)",
#     "/kaggle/input/mul-ensemble-coling-final/mullang_bert-base-multilingual-cased(1)"
]

# test_df = pd.read_json('/kaggle/input/short-mul-lang-coling/multilingual_dev_for_score.jsonl', lines=True)
test_df = pd.read_json('/kaggle/input/coling-25-task-1/test_set_multilingual.jsonl', lines=True)

test_df_tem = test_df

test_df = test_df.sample(n=int(len(test_df) * 0.0001), random_state=42)
# print(test_df['label'].value_counts())

test_df_tem = test_df[['testset_id','text']]




id2label = {0: "human", 1: "machine"}
label2id = {"human": 0, "machine": 1}

file_path_op = None

# Iterate over each model path
for model_path in model_paths:
    model_name = os.path.basename(model_path)  # Extract model name from path
    
    # Generate prediction file name based on model name
    output_file = f"taskb_{model_name}.jsonl"
    file_path_op = output_file
    
    print(f"\n\n{model_name} is predicting-----------")
    
    # Make predictions and save
    predictions = predict_and_save(test_df_tem, model_path, id2label, label2id, output_file)
    
    # Load predictions for evaluation
    pred_df = pd.read_json(output_file, lines=True)
    
    # Evaluate predictions
#     macro_f1, micro_f1, accuracy = evaluate(pred_df, test_df)
    
# #     # Log the scores
# #     logging.info(f"Scores for {model_name}:")
# #     logging.info(f"Macro-F1: {macro_f1:.5f}, Micro-F1: {micro_f1:.5f}, Accuracy: {accuracy:.5f}")
    
#     print(f"Scores for {model_name}:")
#     print(f"Macro-F1: {macro_f1:.5f}, Micro-F1: {micro_f1:.5f}, Accuracy: {accuracy:.5f}")



In [None]:
import os
import argparse
import logging
import json
import pandas as pd


# logging.basicConfig(format='%(levelname)s : %(message)s', level=logging.INFO)
COLUMNS = ['testset_id', 'label']


def check_format(file_path):
    if not os.path.exists(file_path):
        logging.error("File doesnt exists: {}".format(file_path))
        return False

    try:
        submission = pd.read_json(file_path, lines=True)[['testset_id', 'label']]
    except:
        logging.error("File is not a valid json file: {}".format(file_path))
        return False

    for column in COLUMNS:
        if submission[column].isna().any():
            logging.error("NA value in file {} in column {}".format(file_path, column))
            return False

    if not submission['label'].isin(range(0, 2)).all():
        logging.error("Unknown Label in file {}".format(file_path))
        logging.error("Unique Labels in the file are {}".format(submission['label'].unique()))
        return False

    return True

  
check_result = check_format(file_path_op)
result = 'Format is correct' if check_result else 'Something wrong in file format'
#     logging.info("Checking file: {}. Result: {}".format(prediction_file_path, result))
print(file_path_op ," ",result)

# Ensemble

In [None]:
import os
import logging
import pandas as pd
from sklearn.metrics import f1_score, accuracy_score
from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer, DataCollatorWithPadding
import numpy as np
from datasets import Dataset

# Preprocessing function for tokenization
def preprocess_function(examples, **fn_kwargs):
    tokenizer = fn_kwargs['tokenizer']
    # Ensure all entries are strings and join if necessary
    examples["text"] = [' '.join(map(str, text)) if isinstance(text, list) else str(text) for text in examples["text"]]
    return tokenizer(examples["text"], truncation=True, max_length=512)


# Function to get model predictions and probabilities
def predict_probs(test_df, model_path, id2label, label2id):
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    model = AutoModelForSequenceClassification.from_pretrained(
        model_path, num_labels=len(label2id), id2label=id2label, label2id=label2id
    )
#     model.eval()
    
#     test_df['text'] = test_df['text'].apply(lambda x: ' '.join(x) if isinstance(x, list) else x)

    test_dataset = Dataset.from_pandas(test_df)
    tokenized_test_dataset = test_dataset.map(preprocess_function, batched=True, fn_kwargs={'tokenizer': tokenizer})
    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
    
    trainer = Trainer(
        model=model,
        tokenizer=tokenizer,
        data_collator=data_collator,
    )
    
    # Get raw logits
    predictions = trainer.predict(tokenized_test_dataset)
    
    # Return the logits (not softmaxed)
    return predictions.predictions

# Function to calculate perplexity from logits
def calculate_perplexity(logits, labels):
    # Apply softmax to get probabilities
    probs = np.exp(logits) / np.exp(logits).sum(axis=-1, keepdims=True)
    
    # Get the probabilities of the true class
    true_probs = np.array([probs[i, label] for i, label in enumerate(labels)])
    
    # Calculate perplexity
    neg_log_likelihood = -np.mean(np.log(true_probs + 1e-10))  # Add small value to prevent log(0)
    perplexity = np.exp(neg_log_likelihood)
    
    return perplexity

# Function to aggregate predictions using weighted average (soft voting)
def ensemble_predict_with_weights(test_df, model_paths, id2label, label2id, perplexities):
    model_weights = calculate_weights(perplexities)
    print(f"Model weights based on perplexity: {model_weights}")
    
    all_probs = None
    
    for i, model_path in enumerate(model_paths):
        print(f"\n\n{os.path.basename(model_path)} is predicting-----------")
        
        # Get prediction logits from each model
        model_probs = predict_probs(test_df, model_path, id2label, label2id)
        
        # Apply weight to the model's probabilities
        weighted_probs = model_probs * model_weights[i]
        
        if all_probs is None:
            all_probs = weighted_probs
        else:
            all_probs += weighted_probs  # Accumulate the weighted probabilities
        
#         print(f"Weighted probs for {os.path.basename(model_path)}: {weighted_probs}")
    
    avg_probs = all_probs
    final_preds = np.argmax(avg_probs, axis=-1)
    
    return final_preds

# Function to calculate weights based on inverse perplexity
def calculate_weights(perplexities):
    inverse_perplexities = 1 / np.array(perplexities)
    normalized_weights = inverse_perplexities / inverse_perplexities.sum()
    return normalized_weights

# Function to evaluate predictions
def evaluate(predictions, gold_labels):
    merged_df = predictions.merge(gold_labels, on=['testset_id'], suffixes=('_pred', '_gold'))
    macro_f1 = f1_score(merged_df['label_gold'], merged_df['label_pred'], average="macro", zero_division=0)
    micro_f1 = f1_score(merged_df['label_gold'], merged_df['label_pred'], average="micro", zero_division=0)
    accuracy = accuracy_score(merged_df['label_gold'], merged_df['label_pred'])
    
    return macro_f1, micro_f1, accuracy


# ----------------------------------------------------------------------------------------------------------------------

# Paths to your trained models

model_paths = [
#     "/kaggle/input/mul-ensemble-coling-final/mullang_rembert",
#     "/kaggle/input/mul-ensemble-coling-final/mullang_xlm-roberta-base(1)",
    "/kaggle/input/mul-ensemble-coling-final/mullang_bert-base-multilingual-cased(1)"
]

# Load test data
weight_cal_df = pd.read_json('/kaggle/input/short-mul-lang-coling/multilingual_dev_for_score.jsonl', lines=True)
weight_cal_df = weight_cal_df.sample(n=int(len(weight_cal_df) * 0.01), random_state=42)
weight_cal_df_tem = weight_cal_df[['id', 'text']]

labels = weight_cal_df['label'].values

test_df = pd.read_json('/kaggle/input/coling-25-task-1/test_set_multilingual.jsonl', lines=True)
# test_df = test_df.sample(n=int(len(test_df) * 0.0001), random_state=42)
test_df_tem = test_df[['testset_id', 'text']]

# Dictionary to map labels (adjust as needed)
id2label = {0: "human", 1: "machine"}
label2id = {"human": 0, "machine": 1}

# Calculate perplexities for each model
perplexities = []
for model_path in model_paths:
    print(f"Calculating perplexity for {os.path.basename(model_path)}...")
    logits = predict_probs(weight_cal_df_tem, model_path, id2label, label2id)
    perplexity = calculate_perplexity(logits, labels)
    print(f"Perplexity for {os.path.basename(model_path)}: {perplexity:.5f}")
    perplexities.append((perplexity-1))
    
# perplexities = [0.15131,0.15264]

# Use the weighted ensemble method for predictions based on perplexity
final_preds = ensemble_predict_with_weights(test_df_tem, model_paths, id2label, label2id, perplexities)

# Generate a dynamic filename based on model names
def generate_filename(model_paths):
    # Extract base model names from paths
    model_names = [os.path.basename(model_path) for model_path in model_paths]
    # Join the model names with underscores
    filename = "mul_ensemble_predictions.jsonl"
    return filename

# Use the generated filename for saving predictions
ensemble_filename = generate_filename(model_paths)
print("ensemble file name", ensemble_filename)

# Save ensemble predictions to JSONL
ensemble_predictions_df = pd.DataFrame({'testset_id': test_df['testset_id'], 'label': final_preds})
ensemble_predictions_df.to_json(ensemble_filename, lines=True, orient='records')

In [None]:
import os
import argparse
import logging
import json
import pandas as pd


# logging.basicConfig(format='%(levelname)s : %(message)s', level=logging.INFO)
COLUMNS = ['testset_id', 'label']


def check_format(file_path):
    if not os.path.exists(file_path):
        logging.error("File doesnt exists: {}".format(file_path))
        return False

    try:
        submission = pd.read_json(file_path, lines=True)[['testset_id', 'label']]
    except:
        logging.error("File is not a valid json file: {}".format(file_path))
        return False

    for column in COLUMNS:
        if submission[column].isna().any():
            logging.error("NA value in file {} in column {}".format(file_path, column))
            return False

    if not submission['label'].isin(range(0, 2)).all():
        logging.error("Unknown Label in file {}".format(file_path))
        logging.error("Unique Labels in the file are {}".format(submission['label'].unique()))
        return False

    return True

  
check_result = check_format(ensemble_filename)
result = 'Format is correct' if check_result else 'Something wrong in file format'
#     logging.info("Checking file: {}. Result: {}".format(prediction_file_path, result))
print(ensemble_filename ," ",result)

In [None]:
# # Load ensemble predictions for evaluation
# pred_df = pd.read_json(ensemble_filename, lines=True)

# # Evaluate ensemble predictions
# macro_f1, micro_f1, accuracy = evaluate(pred_df, test_df)

# print(f"Ensemble Scores:")
# print(f"Macro-F1: {macro_f1:.5f}, Micro-F1: {micro_f1:.5f}, Accuracy: {accuracy:.5f}")