Integration of 936 JSON ingredient files downloaded from Flavour DB into a unified file named "integrated_data". Extract the attribute "entity_alias_readable" representing the ingredient and its sub-attribute "molecules". Within "molecules", extract the attributes "flavor_profile", "fooddb_flavor_profile", and "common_name" representing the molecule name, taste, and flavor information.

In [None]:
import os
import json
import networkx as nx
import matplotlib.pyplot as plt
# Specify the folder path containing the JSON files
folder_path = "C:/Users/ghaza/Downloads/ingrediants"

# Check if the output file already exists
output_file_path = "C:/Users/ghaza/Downloads/integrated_data.json"
if os.path.exists(output_file_path):
    os.remove(output_file_path)
    
# Create a dictionary to store the integrated data
integrated_data = []

# Iterate over each file in the folder
for filename in os.listdir(folder_path):
    if filename.endswith(".json"):
        file_path = os.path.join(folder_path, filename)

        # Read the JSON file
        with open(file_path, "r") as file:
            file_data = json.load(file)
            ingredient = file_data.get("entity_alias_readable", "")
            molecules = file_data.get("molecules", [])

            # Iterate over molecules and extract relevant data
            for molecule in molecules:
                molecule_info = {
                    "flavor": molecule.get("flavor_profile", ""),
                    "molecule": molecule.get("common_name", ""),
                    "fooddb_flavor_profile": molecule.get("fooddb_flavor_profile", ""),
                    "taste": molecule.get("taste", "")
                }
                ingredient_data = {
                    "ingredients": ingredient,
                    "molecules": [molecule_info]
                }

                # Check if ingredient already exists in integrated_data
                existing_ingredient = next((item for item in integrated_data if item["ingredients"] == ingredient), None)

                # If ingredient already exists, append molecule to existing ingredient
                if existing_ingredient:
                    existing_ingredient["molecules"].append(molecule_info)
                else:
                    integrated_data.append(ingredient_data)

# Write the integrated data into the output file
with open(output_file_path, "w") as output_file:
    json.dump(integrated_data, output_file, indent=4)

print("Integrated JSON file created successfully.")


make list of ingrediants names 

In [None]:
import json

# Read the file
with open('C:/Users/ghaza/Downloads/integrated_data.json') as file:
    data = json.load(file)

# Extract the ingredient names
ingredient_names = [item['ingredients'] for item in data]

# Print the ingredient names
ingredient_names


In [None]:
file_path = 'C:/Users/ghaza/Downloads/ingredient_names.txt'

# Create a dictionary with the 'ingredients' key and the ingredient names list as the value
data =  ingredient_names

# Save the data to the file as JSON
with open(file_path, 'w') as file:
    json.dump(data, file)

The resulting graph visually represents the relationships between ingredients, with edges indicating the presence of shared
molecules and the weight (number of shared molecules) displayed as labels on the edges.

In [None]:
import json
import networkx as nx
import matplotlib.pyplot as plt
import pickle

# Read the JSON file
with open('C:/Users/ghaza/Downloads/integrated_data.json') as file:
    data = json.load(file)

# Extract ingredient names, molecules, categories, and colors
ingredients_data = data
ingredients = []
category_colors = {}  # Dictionary to store category colors
color_index = 0  # Counter for assigning colors
for ingredient in ingredients_data:
    ingredient_dict = {
        'name': ingredient['ingredients'],
        'molecules': [],
        'category': ingredient['category'],
    }
    for molecule in ingredient['molecules']:
        ingredient_dict['molecules'].append(molecule['molecule'])
    ingredients.append(ingredient_dict)

    # Convert the category to a tuple if it's a list
    category = ingredient['category']
    if isinstance(category, list):
        category = tuple(category)

    # Check if the category already has a color assigned
    if category not in category_colors:
        # Assign a new color to the category
        category_colors[category] = f'C{color_index}'
        color_index += 1

# Create an empty graph
graph = nx.Graph()

# Iterate over ingredient pairs
for i in range(len(ingredients)):
    for j in range(i + 1, len(ingredients)):
        ing1 = ingredients[i]
        ing2 = ingredients[j]

        # Check if ing1 and ing2 share a molecule
        shared_molecules = set(ing1['molecules']).intersection(ing2['molecules'])
        if shared_molecules:
            # Add an edge between ing1 and ing2 with the weight of the number of shared molecules
            weight = len(shared_molecules)
            graph.add_edge(ing1['name'], ing2['name'], weight=weight)

            # Assign the category to the nodes
            graph.nodes[ing1['name']]['category'] = category
            graph.nodes[ing2['name']]['category'] = category

# Save the graph using Pickle
with open('graph_shared_molecules_weights.pkl', 'wb') as file:
    pickle.dump(graph, file)

# Draw the graph with edge labels, category, and color nodes
plt.figure(figsize=(100, 80))
pos = nx.spring_layout(graph)
node_colors = [category_colors[graph.nodes[node]['category']] for node in graph.nodes()]
nx.draw(graph, pos, with_labels=True, node_size=500, node_color=node_colors, edge_color='gray')
labels = nx.get_edge_attributes(graph, 'weight')
nx.draw_networkx_edge_labels(graph, pos, edge_labels=labels)

# Draw category nodes
for category, color in category_colors.items():
    plt.text(0, 0, str(category), color=color, ha='center', fontsize=8)

plt.show()



The resulting graph visually represents the relationships between ingredients, with edges indicating the presence of shared flavors and the weight (number of shared flavors) displayed as labels on the edges.

In [None]:
import json
import networkx as nx
import matplotlib.pyplot as plt
import pickle

# Read the JSON file
with open('C:/Users/ghaza/Downloads/integrated_data.json') as file:
    data = json.load(file)

# Extract ingredient names, molecules, and categories
ingredients_data = data
ingredients = []
for ingredient in ingredients_data:
    ingredient_dict = {
        'name': ingredient['ingredients'],
        'molecules': {},
        'category': ingredient['category']
    }
    for molecule in ingredient['molecules']:
        ingredient_dict['molecules'][molecule['molecule']] = molecule['fooddb_flavor_profile']
    ingredients.append(ingredient_dict)

# Create an empty graph
graph = nx.Graph()

# Iterate over ingredient pairs
for i in range(len(ingredients)):
    for j in range(i + 1, len(ingredients)):
        ing1 = ingredients[i]
        ing2 = ingredients[j]

        # Find shared molecules
        shared_molecules = set(ing1['molecules'].keys()) & set(ing2['molecules'].keys())

        # Process shared molecules and flavors
        shared_flavors = []
        for molecule in shared_molecules:
            flavors = ing1['molecules'][molecule].split("@")
            shared_flavors.extend(flavor for flavor in flavors)
        weight = len(set(shared_flavors))

        # Add an edge with the weight between ing1 and ing2
        if weight > 0:
            graph.add_edge(ing1['name'], ing2['name'], weight=weight)

# Create a mapping of categories to colors
category_colors = {}
color_index = 0
for ingredient in ingredients:
    category = ingredient['category']
    if isinstance(category, list):
        category = tuple(category)
    if category not in category_colors:
        category_colors[category] = f'C{color_index}'
        color_index += 1

# Assign category colors to nodes
node_colors = [category_colors[tuple(ingredient['category'])] for ingredient in ingredients]

# Remove duplicate ingredients
unique_ingredients = []
ingredient_names = set()  # Keep track of ingredient names
for ingredient in ingredients:
    name = ingredient['name']
    if name not in ingredient_names:
        unique_ingredients.append(ingredient)
        ingredient_names.add(name)

# Use unique_ingredients list for further processing and graph creation
ingredients = unique_ingredients
node_names = [ingredient['name'] for ingredient in ingredients]  # Extract the unique ingredient names

# Update node_colors based on the unique ingredient names
node_colors = [node_colors[node_names.index(name)] for name in node_names]

# Create a new graph with unique ingredients
graph = nx.Graph()

# Iterate over ingredient pairs
for i in range(len(ingredients)):
    for j in range(i + 1, len(ingredients)):
        ing1 = ingredients[i]
        ing2 = ingredients[j]

        # Find shared molecules
        shared_molecules = set(ing1['molecules'].keys()) & set(ing2['molecules'].keys())

        # Process shared molecules and flavors
        shared_flavors = []
        for molecule in shared_molecules:
            flavors = ing1['molecules'][molecule].split("@")
            shared_flavors.extend(flavor for flavor in flavors)
        weight = len(set(shared_flavors))

        # Add an edge with the weight between ing1 and ing2
        if weight > 0:
            graph.add_edge(ing1['name'], ing2['name'], weight=weight)

# Save the graph using Pickle
with open('graph_shared_flavors_weights.pkl', 'wb') as file:
    pickle.dump(graph, file)

# Draw the graph
plt.figure(figsize=(100, 80))
pos = nx.spring_layout(graph, seed=42)  # Set a fixed seed for consistent layout
weights = nx.get_edge_attributes(graph, 'weight')

# Convert node_colors to a list of colors corresponding to node_names
node_colors = [node_colors[node_names.index(node)] for node in graph.nodes]

# Draw nodes with correct colors
nx.draw_networkx_nodes(graph, pos, node_color=node_colors, node_size=2000, cmap='rainbow')
nx.draw_networkx_edges(graph, pos)
nx.draw_networkx_labels(graph, pos, font_size=20)

# Draw edge labels
nx.draw_networkx_edge_labels(graph, pos, edge_labels=weights, font_size=100)

# Draw category nodes
for category, color in category_colors.items():
    plt.text(0, 0, str(category), color=color, ha='center', fontsize=8)

plt.axis('off')
plt.show()

In this modified code, each ingredient is connected to the top 10 ingredients that have the most shared flavors with it. The resulting graph will reflect these connections. The top 10 ingredients with the most shared flavors are connected to the current ingredient in the graph.

In [None]:
import json
import networkx as nx
import matplotlib.pyplot as plt

# Read the JSON file
with open('C:/Users/ghaza/Downloads/integrated_data.json') as file:
    data = json.load(file)

# Extract ingredient names, flavors, and categories
ingredients_data = data
ingredients = []
category_colors = {}
color_index = 0

for ingredient in ingredients_data:
    ingredient_dict = {
        'name': ingredient['ingredients'],
        'flavors': [],
        'category': ingredient['category']
    }
    for molecule in ingredient['molecules']:
        ingredient_dict['flavors'].extend(molecule['flavor'].split('@'))
    ingredients.append(ingredient_dict)

    # Store the category-color mapping
    category = ingredient['category']
    if isinstance(category, list):
        category = tuple(category)
    if category not in category_colors:
        category_colors[category] = f'C{color_index}'
        color_index += 1

# Create an empty graph
graph = nx.Graph()

# Iterate over ingredients
for i in range(len(ingredients)):
    ing1 = ingredients[i]
    shared_counts = []

    # Calculate shared flavor counts with other ingredients
    for j in range(len(ingredients)):
        if i != j:
            ing2 = ingredients[j]
            shared_count = len(set(ing1['flavors']).intersection(ing2['flavors']))
            shared_counts.append((j, shared_count))

    # Sort by shared flavor counts in descending order
    shared_counts.sort(key=lambda x: x[1], reverse=True)

    # Connect ing1 to the top 10 ingredients with the most shared flavors
    for j, _ in shared_counts[:10]:
        ing2 = ingredients[j]
        graph.add_edge(ing1['name'], ing2['name'])

# Save the graph using Pickle
with open('graph_most_shared_flavors.pkl', 'wb') as file:
    pickle.dump(graph, file)

# Draw the graph
plt.figure(figsize=(100, 80))  # Adjust the figure size as desired (width, height)

# Use spring layout with fixed seed for consistent layout
pos = nx.spring_layout(graph, seed=42)

# Draw nodes with correct colors based on categories
for ingredient in ingredients:
    category = ingredient['category']
    if isinstance(category, list):
        category = tuple(category)
    nx.draw_networkx_nodes(
        graph,
        pos,
        nodelist=[ingredient['name']],
        node_color=category_colors[category],
        node_size=2000,
        cmap='rainbow'
    )

# Draw edges
nx.draw_networkx_edges(graph, pos)

# Draw labels
nx.draw_networkx_labels(graph, pos, font_size=20)

# Draw category nodes
for category, color in category_colors.items():
    plt.text(0, 0, str(category), color=color, ha='center', fontsize=8)

plt.axis('off')
plt.show()



In this modified code, each ingredient is connected to the top 10 ingredients that have the most shared flavors with it. The resulting graph will reflect these connections. The top 10 ingredients with the most shared flavors are connected to the current ingredient in the graph.The weights of edges are the count af shared flavours.

In [None]:
import json
import networkx as nx
import matplotlib.pyplot as plt

# Read the JSON file
with open('C:/Users/ghaza/Downloads/integrated_data.json') as file:
    data = json.load(file)

# Extract ingredient names, flavors, and categories
ingredients_data = data
ingredients = []
categories = set()
for ingredient in ingredients_data:
    ingredient_dict = {
        'name': ingredient['ingredients'],
        'flavors': [],
        'category': ingredient['category']
    }
    for molecule in ingredient['molecules']:
        ingredient_dict['flavors'].extend(molecule['flavor'].split('@'))
    ingredients.append(ingredient_dict)
    categories.add(tuple(ingredient['category']))  # Convert category list to tuple

# Create a mapping of categories to colors
category_colors = {}
color_index = 0
for category in categories:
    category_colors[category] = f'C{color_index}'
    color_index += 1

# Create an empty graph
graph = nx.Graph()

# Iterate over ingredients
for i in range(len(ingredients)):
    ing1 = ingredients[i]
    shared_counts = []

    # Calculate shared flavor counts with other ingredients
    for j in range(len(ingredients)):
        if i != j:
            ing2 = ingredients[j]
            shared_count = len(set(ing1['flavors']).intersection(ing2['flavors']))
            shared_counts.append((j, shared_count))

    # Sort by shared flavor counts in descending order
    shared_counts.sort(key=lambda x: x[1], reverse=True)

    # Connect ing1 to the top 10 ingredients with the most shared flavors
    for j, _ in shared_counts[:10]:
        ing2 = ingredients[j]
        graph.add_edge(ing1['name'], ing2['name'])

# Draw the graph
plt.figure(figsize=(100, 80))  # Adjust the figure size as desired (width, height)

# Use spring layout with fixed seed for consistent layout
pos = nx.spring_layout(graph, seed=42)

# Draw nodes with correct colors based on categories
node_colors = [category_colors[tuple(ingredient['category'])] for ingredient in ingredients]
nx.draw_networkx_nodes(graph, pos, node_color=node_colors, node_size=2000, cmap='rainbow')

# Draw edges
nx.draw_networkx_edges(graph, pos)

# Draw labels
nx.draw_networkx_labels(graph, pos, font_size=20)

# Draw category nodes
for category, color in category_colors.items():
    plt.text(0, 0, str(category), color=color, ha='center', fontsize=8)

plt.axis('off')
plt.show()

In this modified code, each ingredient is connected to the top 10 ingredients that have the most shared molecules with it. The resulting graph will reflect these connections.

In [None]:
import json
import networkx as nx
import matplotlib.pyplot as plt

# Read the JSON file
with open('C:/Users/ghaza/Downloads/integrated_data.json') as file:
    data = json.load(file)

# Extract ingredient names, molecules, and categories
ingredients_data = data
ingredients = []
for ingredient in ingredients_data:
    ingredient_dict = {
        'name': ingredient['ingredients'],
        'molecules': [],
        'category': ingredient['category']  # Add the category information
    }
    for molecule in ingredient['molecules']:
        ingredient_dict['molecules'].append(molecule['molecule'])
    ingredients.append(ingredient_dict)

# Create an empty graph
graph = nx.Graph()

# Iterate over ingredients
for i in range(len(ingredients)):
    ing1 = ingredients[i]
    shared_counts = []
    
    # Calculate shared molecule counts with other ingredients
    for j in range(len(ingredients)):
        if i != j:
            ing2 = ingredients[j]
            shared_count = len(set(ing1['molecules']).intersection(ing2['molecules']))
            shared_counts.append((j, shared_count))
    
    # Sort by shared molecule counts in descending order
    shared_counts.sort(key=lambda x: x[1], reverse=True)
    
    # Connect ing1 to the top 10 ingredients with the most shared molecules
    for j, _ in shared_counts[:10]:
        ing2 = ingredients[j]
        graph.add_edge(ing1['name'], ing2['name'])

# Create a mapping of categories to colors
category_colors = {}
color_index = 0
for ingredient in ingredients:
    category = ingredient['category']
    if isinstance(category, list):
        category = tuple(category)
    if category not in category_colors:
        category_colors[category] = f'C{color_index}'
        color_index += 1

# Draw the graph
plt.figure(figsize=(100, 80))  # Adjust the figure size as desired (width, height)
pos = nx.spring_layout(graph)  # Positions the nodes using the spring layout algorithm

# Assign node colors based on categories
node_colors = [category_colors[tuple(ingredient['category'])] for ingredient in ingredients]
nx.draw_networkx_nodes(graph, pos, node_color=node_colors, node_size=2000, cmap='rainbow')

# Draw edges
nx.draw_networkx_edges(graph, pos)

# Draw labels
nx.draw_networkx_labels(graph, pos, font_size=20)

plt.axis('off')
plt.show()



In this modified code, each ingredient is connected to the top 10 ingredients that have the most shared molecules with it. The resulting graph will reflect these connections and the count of shared molocules as considered as the weight of edges 

In [None]:
import json
import networkx as nx
import matplotlib.pyplot as plt

# Read the JSON file
with open('C:/Users/ghaza/Downloads/integrated_data.json') as file:
    data = json.load(file)

# Extract ingredient names, molecules, and categories
ingredients_data = data
ingredients = []
for ingredient in ingredients_data:
    ingredient_dict = {
        'name': ingredient['ingredients'],
        'molecules': [],
        'category': ingredient['category']  # Add the category information
    }
    for molecule in ingredient['molecules']:
        ingredient_dict['molecules'].append(molecule['molecule'])
    ingredients.append(ingredient_dict)

# Create an empty graph
graph = nx.Graph()

# Iterate over ingredients
for i in range(len(ingredients)):
    ing1 = ingredients[i]
    shared_counts = []
    
    # Calculate shared molecule counts with other ingredients
    for j in range(len(ingredients)):
        if i != j:
            ing2 = ingredients[j]
            shared_count = len(set(ing1['molecules']).intersection(ing2['molecules']))
            shared_counts.append((j, shared_count))
    
    # Sort by shared molecule counts in descending order
    shared_counts.sort(key=lambda x: x[1], reverse=True)
    
    # Connect ing1 to the top 10 ingredients with the most shared molecules
    for j, shared_count in shared_counts[:10]:
        ing2 = ingredients[j]
        if shared_count > 0:
            graph.add_edge(ing1['name'], ing2['name'], weight=shared_count)

# Create a mapping of categories to colors
category_colors = {}
color_index = 0
for ingredient in ingredients:
    category = ingredient['category']
    if isinstance(category, list):
        category = tuple(category)
    if category not in category_colors:
        category_colors[category] = f'C{color_index}'
        color_index += 1

plt.figure(figsize=(100, 80))  # Adjust the figure size as desired (width, height)
pos = nx.spring_layout(graph)
weights = nx.get_edge_attributes(graph, 'weight')

# Assign node colors based on categories for all nodes in the graph
node_colors = [category_colors[tuple(ingredient['category'])] for ingredient in ingredients if ingredient['name'] in graph.nodes]

# Draw the graph with colored nodes
nx.draw_networkx(graph, pos, with_labels=True, node_color=node_colors, node_size=100, font_size=20, cmap=plt.cm.rainbow)

# Draw edge labels
nx.draw_networkx_edge_labels(graph, pos, edge_labels=weights, font_size=10)

plt.show()