In [None]:
!pip install nltk rouge
!git clone https://github.com/salaniz/pycocoevalcap


Collecting rouge
  Downloading rouge-1.0.1-py3-none-any.whl (13 kB)
Installing collected packages: rouge
Successfully installed rouge-1.0.1
Cloning into 'pycocoevalcap'...
remote: Enumerating objects: 821, done.[K
remote: Counting objects: 100% (24/24), done.[K
remote: Compressing objects: 100% (20/20), done.[K
remote: Total 821 (delta 5), reused 19 (delta 4), pack-reused 797[K
Receiving objects: 100% (821/821), 130.06 MiB | 8.93 MiB/s, done.
Resolving deltas: 100% (424/424), done.
Updating files: 100% (40/40), done.


In [None]:
from google.colab import drive
# Mount Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import json
import nltk
from nltk.tokenize import word_tokenize
from nltk.translate.bleu_score import corpus_bleu, SmoothingFunction
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from random import shuffle

nltk.download('punkt')
nltk.download('wordnet')

def load_json(file_path):
    with open(file_path, 'r') as f:
        return json.load(f)

def calculate_corpus_bleu(references, candidate):
    references_tokenized = [[word_tokenize(ref.lower()) for ref in references]]
    candidate_tokenized = [word_tokenize(candidate.lower())]
    smoothing_function = SmoothingFunction().method1
    return corpus_bleu(references_tokenized, candidate_tokenized, smoothing_function=smoothing_function)

def calculate_cosine_similarity(text1, text2):
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform([text1, text2])
    return cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]

def calculate_average_cosine_similarity(generated_caption, original_captions):
    similarities = [calculate_cosine_similarity(generated_caption, original) for original in original_captions]
    return sum(similarities) / len(similarities)

# Function to calculate the average of a list
def avg(lst):
    return sum(lst) / len(lst) if lst else 0

# Load the entire dataset
data = load_json('/content/drive/MyDrive/MasterThesis/flickr8k_dataset/Scenario1_distilbart_with_all_similarities_only_2_flickr.json')

# Shuffle and split the dataset into training (80%) and testing (20%) segments
items = list(data.values())
shuffle(items)
split_index = int(0.8 * len(items))
data_train = items[:split_index]
data_test = items[split_index:]

# Initialize counters for calculating priors based on cosine similarity
better_count_train = {'blip': 0, 'gpt2': 0}

# Calculate priors based on average cosine similarity in training data
for item in data_train:
    original_captions = [' '.join(item['original_coco_captions'])]  # Combine all original captions into a single text for simplicity
    cos_similarities = {model: calculate_average_cosine_similarity(item['generated_captions'][model], original_captions) for model in ['gpt2', 'blip']}
    better_model = max(cos_similarities, key=cos_similarities.get)
    better_count_train[better_model] += 1

total_instances_train = sum(better_count_train.values())
prior_gpt = better_count_train['gpt2'] / total_instances_train
prior_blip = better_count_train['blip'] / total_instances_train

# Calculate likelihoods using testing data
bleu_scores_test = {'blip': [], 'gpt2': []}
for item in data_test:
    original_captions = item['original_coco_captions']
    for model in ['gpt2', 'blip']:
        bleu_score = calculate_corpus_bleu(original_captions, item['generated_captions'][model])
        bleu_scores_test[model].append(bleu_score)

likelihood_gpt = avg(bleu_scores_test['gpt2'])
likelihood_blip = avg(bleu_scores_test['blip'])

# Calculate Marginal Likelihood (Evidence)
marginal_likelihood = likelihood_gpt * prior_gpt + likelihood_blip * prior_blip

# Calculate Posterior probabilities
posterior_gpt = (likelihood_gpt * prior_gpt) / marginal_likelihood
posterior_blip = (likelihood_blip * prior_blip) / marginal_likelihood

print(f"Prior for GPT: {prior_gpt:.4f}, Prior for BLIP: {prior_blip:.4f}")
print(f"Likelihood (Average BLEU) for GPT on test data: {likelihood_gpt:.4f}")
print(f"Likelihood (Average BLEU) for BLIP on test data: {likelihood_blip:.4f}")
print(f"Marginal Likelihood (Evidence): {marginal_likelihood:.4f}")
print(f"Posterior for GPT: {posterior_gpt:.4f}")
print(f"Posterior for BLIP: {posterior_blip:.4f}")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Prior for GPT: 0.3136, Prior for BLIP: 0.6864
Likelihood (Average BLEU) for GPT on test data: 0.1555
Likelihood (Average BLEU) for BLIP on test data: 0.1772
Marginal Likelihood (Evidence): 0.1704
Posterior for GPT: 0.2862
Posterior for BLIP: 0.7138


In [None]:
import json
import nltk
from nltk.tokenize import word_tokenize
from nltk.translate.bleu_score import corpus_bleu, SmoothingFunction
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from random import shuffle

nltk.download('punkt')
nltk.download('wordnet')

def load_json(file_path):
    with open(file_path, 'r') as f:
        return json.load(f)

def calculate_corpus_bleu(references, candidate):
    references_tokenized = [[word_tokenize(ref.lower()) for ref in references]]
    candidate_tokenized = [word_tokenize(candidate.lower())]
    smoothing_function = SmoothingFunction().method1
    return corpus_bleu(references_tokenized, candidate_tokenized, smoothing_function=smoothing_function)

def calculate_cosine_similarity(text1, text2):
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform([text1, text2])
    return cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]

def calculate_average_cosine_similarity(generated_caption, original_captions):
    similarities = [calculate_cosine_similarity(generated_caption, original) for original in original_captions]
    return sum(similarities) / len(similarities)

# Function to calculate the average of a list
def avg(lst):
    return sum(lst) / len(lst) if lst else 0

# Load the entire dataset
data = load_json('/content/drive/MyDrive/MasterThesis/flickr8k_dataset/Scenario2_distilbart_with_all_similarities_only_2_flickr.json')

# Shuffle and split the dataset into training (80%) and testing (20%) segments
items = list(data.values())
shuffle(items)
split_index = int(0.8 * len(items))
data_train = items[:split_index]
data_test = items[split_index:]

# Initialize counters for calculating priors based on cosine similarity
better_count_train = {'blip': 0, 'gpt2': 0}

# Calculate priors based on average cosine similarity in training data
for item in data_train:
    original_captions = [' '.join(item['original_coco_captions'])]  # Combine all original captions into a single text for simplicity
    cos_similarities = {model: calculate_average_cosine_similarity(item['generated_captions'][model], original_captions) for model in ['gpt2', 'blip']}
    better_model = max(cos_similarities, key=cos_similarities.get)
    better_count_train[better_model] += 1

total_instances_train = sum(better_count_train.values())
prior_gpt = better_count_train['gpt2'] / total_instances_train
prior_blip = better_count_train['blip'] / total_instances_train

# Calculate likelihoods using testing data
bleu_scores_test = {'blip': [], 'gpt2': []}
for item in data_test:
    original_captions = item['original_coco_captions']
    for model in ['gpt2', 'blip']:
        bleu_score = calculate_corpus_bleu(original_captions, item['generated_captions'][model])
        bleu_scores_test[model].append(bleu_score)

likelihood_gpt = avg(bleu_scores_test['gpt2'])
likelihood_blip = avg(bleu_scores_test['blip'])

# Calculate Marginal Likelihood (Evidence)
marginal_likelihood = likelihood_gpt * prior_gpt + likelihood_blip * prior_blip

# Calculate Posterior probabilities
posterior_gpt = (likelihood_gpt * prior_gpt) / marginal_likelihood
posterior_blip = (likelihood_blip * prior_blip) / marginal_likelihood

print(f"Prior for GPT: {prior_gpt:.4f}, Prior for BLIP: {prior_blip:.4f}")
print(f"Likelihood (Average BLEU) for GPT on test data: {likelihood_gpt:.4f}")
print(f"Likelihood (Average BLEU) for BLIP on test data: {likelihood_blip:.4f}")
print(f"Marginal Likelihood (Evidence): {marginal_likelihood:.4f}")
print(f"Posterior for GPT: {posterior_gpt:.4f}")
print(f"Posterior for BLIP: {posterior_blip:.4f}")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Prior for GPT: 0.3068, Prior for BLIP: 0.6932
Likelihood (Average BLEU) for GPT on test data: 0.1538
Likelihood (Average BLEU) for BLIP on test data: 0.1826
Marginal Likelihood (Evidence): 0.1737
Posterior for GPT: 0.2715
Posterior for BLIP: 0.7285
