Toy Examples

In [115]:
import nltk
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

# Reference (ground truth) and candidate (generated) sentences

string1 = '''"Foods": {
        "Proteins": {
            "Meat": "A rich source of protein, meat includes options like chicken, beef, pork, and lamb, each offering essential nutrients.",
            "Seafood": "Fish and shellfish, such as salmon, tuna, shrimp, and cod, provide high-quality protein and omega-3 fatty acids.",
            "Dairy": "Dairy products like milk, cheese, yogurt, and butter offer calcium, protein, and probiotics for digestive health.",
            "Plant-Based": "Foods like tofu, tempeh, lentils, and chickpeas serve as excellent meat alternatives with high protein and fiber content."
        },
        "Fruits & Vegetables": {
            "Fruits": "Nutrient-dense and naturally sweet, fruits like apples, bananas, oranges, and grapes provide vitamins, fiber, and antioxidants.",
            "Berries": "Small but packed with nutrients, berries such as strawberries, blueberries, raspberries, and blackberries are rich in antioxidants.",
            "Leafy Greens": "Vegetables like spinach, kale, lettuce, and arugula are loaded with vitamins, minerals, and fiber while being low in calories.",
            "Root Vegetables": "Underground vegetables such as carrots, potatoes, beets, and radishes provide essential carbohydrates and vitamins."
        },
        "Grains & Legumes": {
            "Grains": "Staples like rice, wheat, oats, and quinoa are excellent sources of energy and essential carbohydrates.",
            "Pasta & Breads": "Whole wheat, white, multigrain, and sourdough options provide important fiber and carbohydrates for a balanced diet.",
            "Beans": "Black beans, kidney beans, pinto beans, and navy beans are protein-rich legumes packed with fiber and minerals.",
            "Lentils": "Red, green, brown, and black lentils are versatile and nutritious, offering high protein and iron content."
        },
        "Fats & Oils": {
            "Cooking Oils": "Oils like olive, coconut, avocado, and vegetable are used in cooking and contain essential fatty acids.",
            "Animal Fats": "Butter, lard, ghee, and tallow are traditional cooking fats that add flavor and richness to dishes.",
            "Nut & Seed Oils": "Sesame, flaxseed, sunflower, and walnut oils provide unique flavors and are often used in dressings and cooking.",
            "Dairy-Based Fats": "Cream, clarified butter, cheese fat, and yogurt fat add richness and texture to various foods."
        },
        "Beverages & Sweeteners": {
            "Water": "Essential for hydration, water comes in various forms, including still, sparkling, mineral, and flavored options.",
            "Tea & Coffee": "Popular beverages like black tea, green tea, espresso, and lattes provide caffeine and antioxidants.",
            "Juices & Sodas": "Drinks such as orange juice, apple juice, cola, and lemonade offer sweetness but can vary in nutritional value.",
            "Sweeteners": "Sugar, honey, maple syrup, and stevia are commonly used to enhance flavor in foods and beverages."
        }
    },'''

string2 = '''"Countries": {
        "United States": {
            "New York City": "A dynamic urban center renowned for its iconic landmarks, including Times Square, Broadway theaters, and the Statue of Liberty.",
            "Los Angeles": "A hub of the entertainment industry, featuring Hollywood, picturesque beaches, and a lively cultural scene.",
            "Chicago": "Recognized for its signature deep-dish pizza, striking skyline, and the renowned Willis Tower.",
            "San Francisco": "Famous for the Golden Gate Bridge, historic Alcatraz Island, and its distinctive hilly streets."
        },
        "France": {
            "Paris": "The City of Light, celebrated for the Eiffel Tower, world-class museums like the Louvre, and its influence in fashion.",
            "Marseille": "A vibrant port city distinguished by its seafood cuisine, historic harbor, and multicultural heritage.",
            "Lyon": "A gastronomic capital known for its culinary excellence, Roman-era sites, and historic silk trade.",
            "Nice": "A breathtaking coastal destination along the French Riviera, admired for its beaches and Mediterranean allure."
        },
        "Japan": {
            "Tokyo": "A dynamic metropolis blending ancient traditions with cutting-edge technology, featuring lively districts and historic temples.",
            "Kyoto": "A city steeped in history, known for its timeless shrines, scenic bamboo groves, and traditional geisha culture.",
            "Osaka": "A bustling city famed for its vibrant street food scene, historic Osaka Castle, and energetic nightlife.",
            "Hiroshima": "A place of remembrance and resilience, home to the Peace Memorial Park and a rich historical legacy."
        },
        "Italy": {
            "Rome": "A city brimming with history, featuring ancient wonders such as the Colosseum and the grandeur of Vatican City.",
            "Milan": "A global epicenter of fashion and design, known for its elegant streets, renowned art, and architectural marvels.",
            "Venice": "A one-of-a-kind city built on waterways, famous for its scenic gondola rides and the charm of St. Mark's Square.",
            "Florence": "The birthplace of the Renaissance, celebrated for its stunning Duomo, Michelangelo's David, and world-class museums."
        },
        "Brazil": {
            "Rio de Janeiro": "A city known for its stunning landscapes, featuring Christ the Redeemer, the lively Carnival, and iconic beaches like Copacabana.",
            "Sao Paulo": "Brazil's economic powerhouse, distinguished by its soaring skyscrapers, diverse culinary scene, and rich cultural offerings.",
            "Salvador": "A city bursting with Afro-Brazilian heritage, colonial-era architecture, and a thriving music and dance culture.",
            "Brasilia": "The capital city, recognized for its futuristic urban planning, modernist architecture, and political importance."
        }
    },'''

reference = [string1]
candidate = string2

# Tokenize sentences
reference_tokens = [reference[0].split()]  # NLTK expects a list of lists
candidate_tokens = candidate.split()

# Compute BLEU score with smoothing to avoid zero scores for short sentences
smoothing = SmoothingFunction().method1
bleu_score = sentence_bleu(reference_tokens, candidate_tokens, smoothing_function=smoothing)

print(f"BLEU Score: {bleu_score:.4f}")



BLEU Score: 0.0027


In [81]:
from anytree import Node, RenderTree

# Create nodes with string data
root = Node("Root")
child1 = Node("Child1", parent=root)
child2 = Node("Child2", parent=root)
child3 = Node("Child3", parent=root)

# Add children to a specific node
child4 = Node("Child4", parent=child2)
child5 = Node("Child5", parent=child2)

# Access children via indexing
print("First child of root:", root.children[0].name)  # Accessing first child
print("Second child of child2:", child2.children[1].name)  # Accessing second child of child2

# Print the tree structure
for pre, fill, node in RenderTree(root):
    print(f"{pre}{node.name}")


First child of root: Child1
Second child of child2: Child5
Root
├── Child1
├── Child2
│   ├── Child4
│   └── Child5
└── Child3


In [82]:
import tensorflow_hub as hub
import torch

# Load the Universal Sentence Encoder model
model_url = "https://tfhub.dev/google/universal-sentence-encoder/4"
model = hub.load(model_url)

# Sentence to embed
sentence = "Google's Universal Sentence Encoder generates 512-dimensional embeddings."

# Generate the embedding
embedding = model([sentence])[0].numpy()
embedding = torch.tensor(embedding)
print(embedding.shape)

torch.Size([512])


In [4]:
def embed(str, model): 
    embedding = model([str])[0].numpy()
    return torch.tensor(embedding)
    

In [5]:
model_url = "https://tfhub.dev/google/universal-sentence-encoder/4"
model = hub.load(model_url)

sentence = "Google's Universal Sentence Encoder generates 512-dimensional embeddings."
emb = embed(sentence, model)
print(emb.shape)

torch.Size([512])


Reading Json Files

In [101]:
import json

def read_json(file_path):
    with open(file_path, 'r') as file: data = json.load(file)
    return data

collection = read_json("origin.json")
print(collection.keys())
Foods = collection["Foods"]
Countries = collection["Countries"]
Leaders = collection["Leaders"]
print("\t",Foods.keys())
print("\t",Countries.keys())
print("\t",Leaders.keys())

collection = read_json("positives.json")
print(collection.keys())
Foods2 = collection["Foods"]
Countries2= collection["Countries"]
Leaders2 = collection["Leaders"]
print("\t",Foods2.keys())
print("\t",Countries2.keys())
print("\t",Leaders2.keys())

dict_keys(['Foods', 'Countries', 'Leaders', 'Lifestyle', 'Products'])
	 dict_keys(['Proteins', 'Fruits & Vegetables', 'Grains & Legumes', 'Fats & Oils', 'Beverages & Sweeteners'])
	 dict_keys(['France', 'United States', 'Japan', 'Italy', 'Brazil'])
	 dict_keys(['United States', 'France', 'Japan', 'Italy', 'Brazil'])
dict_keys(['Foods', 'Countries', 'Leaders', 'Lifestyle', 'Products'])
	 dict_keys(['Protein Sources', 'Grains & Legumes', 'Fats & Oils', 'Beverages & Drinks '])
	 dict_keys(['United States', 'France', 'Japan', 'Italy', 'Brazil'])
	 dict_keys(['United States', 'France', 'Japan', 'Italy', 'Brazil'])


Defining Functions

In [84]:
def json2tree(input, model): # TREE MUST HAVE ONLY 2 LAYERS 
    root = Node("Root")
    for K1 in input.keys(): 
        child1 = Node(K1, parent=root)
        for K2 in input[K1].keys(): 
            child2 = Node(K2, parent=child1)
            desc = Node(input[K1][K2], parent=child2)
    
    for pre, fill, node in RenderTree(root): node.vect = torch.tensor(model([node.name])[0].numpy())
    return root

def tree_print(tree): 
    for pre, fill, node in RenderTree(tree):
        print(f"{pre}{node.name}:{node.vect.shape}")  

Defining Model

In [83]:
mym = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4")

Defining Score Function + Test

In [107]:
import math

def tree_align(tree_a, tree_b): 
    al1 = tree_a.children
    al2 = tree_b.children

    outer_tree, inner_tree = list(al1), list(al2)
    if(len(al1) > len(al2)): outer_tree, inner_tree = list(al2), list(al1)
    diff = abs(len(al1) - len(al2))

    topis = []
    alignments = []
    pairs = []
    for i in range(len(outer_tree)): 
        vector = outer_tree[i].vect
        correlations = torch.zeros(len(inner_tree))

        for j in range(len(inner_tree)): 
            correlations[j] = torch.inner(vector, inner_tree[j].vect)
        
        topi = torch.argmax(correlations)
        alignments.append(correlations[topi])
        topis.append(topi)
        pairs.append((outer_tree[i], inner_tree[topi]))
        inner_tree.pop(topi)

    return pairs, alignments, topis, diff

def activate(num): return (num + 1)/2

def layer_alignment(alignments): 
    nums = [activate(elem.item()) for elem in alignments]
    return torch.mean(torch.tensor(nums))

def score(a, b, model):
    alpha = 3 
    beta = 0.3
    gamma = 0.3


    # CONVERTING JSON DICTIONARY TO TREES
    tree_a = json2tree(a,model)
    tree_b = json2tree(b,model)

    # LAYER 1 ALIGNMENTS
    L1_collection, L1_alignments, L1_topis, L1_diffs = tree_align(tree_a, tree_b)
    L1_diffs = [L1_diffs]

    # LAYER 2 ALIGNMENTS
    L2_collection, L2_alignments, L2_topis, L2_diffs = [], [], [], []
    for pair in L1_collection: 
        L2_pairs, L2a, L2t, L2d = tree_align(pair[0], pair[1])
        L2_collection.extend(L2_pairs)
        L2_alignments.extend(L2a)
        L2_topis.extend(L2t)
        L2_diffs.extend([L2d])

    print("\nlen(L2_collection):",len(L2_collection))    
    print("len(L2_alignments):",len(L2_alignments))
    print("len(L2_topis):",len(L2_topis))

    # LAYER 3 ALIGNMENTS
    L3_collection, L3_alignments, L3_topis, L3_diffs = [], [], [], []
    for pair in L2_collection: 
        L3_pairs, L3a, L3t, L3d = tree_align(pair[0], pair[1])
        L3_collection.extend(L3_pairs)
        L3_alignments.extend(L3a)
        L3_topis.extend(L3t)
        L3_diffs.extend([L3d])

    print("\nlen(L3_collection):",len(L3_collection))    
    print("len(L3_alignments):",len(L3_alignments))
    print("len(L3_topis):",len(L3_topis))

    # LAYER ALIGNMENT SCORE PROCESSING
    L1A = layer_alignment(L1_alignments)
    L2A = layer_alignment(L2_alignments)
    L3A = layer_alignment(L3_alignments)
    alignment_score = (L1A + L2A + L3A)/3
    print("\nLayer Alignments:", L1A, L2A, L3A)

    # LAYER TOPIS SCORE PROCESSING
    L1_topis = torch.sigmoid(torch.norm(torch.tensor(L1_topis).to(float)))
    L2_topis = torch.sigmoid(torch.norm(torch.tensor(L2_topis).to(float)))
    L3_topis = torch.sigmoid(torch.norm(torch.tensor(L3_topis).to(float)))
    topis_score = -2*((L1_topis + L2_topis + L3_topis)/3)+2

    # LAYER DIFFERENCES SCORE PROCESSING
    L1_diffs = torch.sigmoid(torch.norm(torch.tensor(L1_topis).to(float)))
    L2_diffs = torch.sigmoid(torch.norm(torch.tensor(L2_topis).to(float)))
    L3_diffs = torch.sigmoid(torch.norm(torch.tensor(L3_topis).to(float)))
    diff_score = -2*((L1_diffs + L2_diffs + L3_diffs)/3)+2
 
    print("\nAlignment Score / Topis Score / Diff Score:", alignment_score.item(), topis_score.item(), diff_score.item())

    return (alignment_score**alpha) * (topis_score**beta) * (diff_score**gamma)

print(score(Foods, Countries, mym))


len(L2_collection): 20
len(L2_alignments): 20
len(L2_topis): 20

len(L3_collection): 20
len(L3_alignments): 20
len(L3_topis): 20

Layer Alignments: tensor(0.5692) tensor(0.5836) tensor(0.5274)

Alignment Score / Topis Score / Diff Score: 0.5600563883781433 0.3851846423379939 0.620572396891393
tensor(0.1143, dtype=torch.float64)


  L1_diffs = torch.sigmoid(torch.norm(torch.tensor(L1_topis).to(float)))
  L2_diffs = torch.sigmoid(torch.norm(torch.tensor(L2_topis).to(float)))
  L3_diffs = torch.sigmoid(torch.norm(torch.tensor(L3_topis).to(float)))
