In [1]:
from neo4j import GraphDatabase
from typing import Dict, Any, List
from fuzzywuzzy import process
from sentence_transformers import SentenceTransformer
import numpy as np
from annoy import AnnoyIndex
from ollama import chat
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
# using transformer, fuzzy_matching and cosine similarity

class NutrientGraphManager:

    def __init__(self, uri: str, username: str, password: str):
        """Initialize the Neo4j connection."""
        self.driver = GraphDatabase.driver(uri, auth=(username, password))

    def close(self):
        """Close the Neo4j connection."""
        self.driver.close()
          
    # fetch symptoms in Neo4j database
    def fetch_symptoms(self):
        """Clear all nodes and relationships in the database."""
        with self.driver.session() as session:
            symptoms_fetched = session.run("MATCH (s:Symptom) RETURN COLLECT(s.name)").values()[0][0]
            return symptoms_fetched
        
    # fetch nutrients in Neo4j database
    def fetch_nutrients(self, keywords):
        """Clear all nodes and relationships in the database."""
        with self.driver.session() as session:
            nutrients_fetched = session.run("""MATCH (n:Vitamin)-[:HAS_CATEGORY]->(cat:SymptomCategory)-[:INCLUDES]->(s:Symptom)
            MATCH (n)-[:HAS_CATEGORY]->(scat:SourceCategory)-[:INCLUDES]->(sc:Source)
            MATCH (n)-[:HAS_CATEGORY]->(dc:DailyIntakeCategory)-[:INCLUDES]->(d:Daily_intake_recommendation)
            MATCH (n)-[:HAS_CATEGORY]->(bc:BenefitCategory)-[:INCLUDES]->(b:Benefit)
            WHERE s.name IN $keywords
            RETURN n.name AS Nutrient, COLLECT(DISTINCT n.paper_source) AS Paper_Source, COLLECT(DISTINCT sc.name) AS Source,
            COLLECT(DISTINCT d.name) AS Daily_Intake, COLLECT(DISTINCT b.name) AS Benefit """, keywords=keywords).values()
            return nutrients_fetched

def setup_sentence_transformer_matcher(keyword_list):
    # Load the model
    model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
    
    keyword_sentences = [f"symptoms of {keyword.lower()}" for keyword in keyword_list]
    keyword_vectors = model.encode(keyword_sentences)
    
    # Setup Annoy index
    vector_dimension = keyword_vectors.shape[1]  # Get dimensionality from the model output
    index = AnnoyIndex(vector_dimension, 'angular')
    
    for i, vec in enumerate(keyword_vectors):
        index.add_item(i, vec)
    index.build(10)
    
    return model, index, keyword_vectors

def map_sentence_to_keywords_transformer(model, index, keyword_list, sentence, top_n=3):
    # Convert input to proper sentence format
    input_sentence = f"patient describes symptoms: {sentence.lower()}"
    
    # Get embedding for the sentence
    sentence_vector = model.encode(input_sentence)
    
    # Find nearest neighbors
    nearest = index.get_nns_by_vector(sentence_vector, top_n)
    return [keyword_list[i] for i in nearest]

def map_sentence_to_keywords_fuzzy(keyword_list, sentence, top_n=3):
    matches = process.extract(sentence, keyword_list, limit=top_n)
    return [match[0] for match in matches]

def setup_tfidf_matcher(keyword_list):
    vectorizer = TfidfVectorizer()
    keyword_vectors = vectorizer.fit_transform(keyword_list)
    return vectorizer, keyword_vectors

def map_sentence_to_keywords_tfidf(vectorizer, keyword_vectors, keyword_list, sentence, top_n=3):
    sentence_vector = vectorizer.transform([sentence])
    similarities = cosine_similarity(sentence_vector, keyword_vectors).flatten()
    top_indices = np.argsort(similarities)[-top_n:][::-1]
    return [keyword_list[i] for i in top_indices]

def main():
    URI = "neo4j://localhost:7687"  
    USERNAME = "neo4j"              
    PASSWORD = "obinutriproject"     

    graph_manager = NutrientGraphManager(URI, USERNAME, PASSWORD)

    try:
        keywords = graph_manager.fetch_symptoms()    
        #print("Keywords:", keywords)
        user_symptom = input("Enter your symptom: ")
        model,index,_ = setup_sentence_transformer_matcher(keywords)
        mapped_keywords = map_sentence_to_keywords_transformer(model, index, keywords, user_symptom)
        
        

        nutrients = graph_manager.fetch_nutrients(mapped_keywords)
        print("The response below is for transformer model")
        print("Mapped Keywords:", mapped_keywords)
        print("Nutrients:", nutrients)
        print([item[0] for item in nutrients])
        mapped_keywords = map_sentence_to_keywords_fuzzy(keywords, user_symptom)
        nutrients = graph_manager.fetch_nutrients(mapped_keywords)
        print("The response below is for the fuzzy_matching model")
        print("Mapped Keywords:", mapped_keywords)
        print("Nutrients:", nutrients)
        print([item[0] for item in nutrients])
        vectorizer, keyword_vectors = setup_tfidf_matcher(keywords)  
        mapped_keywords = map_sentence_to_keywords_tfidf(vectorizer, keyword_vectors, keywords, user_symptom)
        nutrients = graph_manager.fetch_nutrients(mapped_keywords)
        print("The response below is for the cosine similarity model")
        print("Mapped Keywords:", mapped_keywords)
        print("Nutrients:", nutrients)
        print([item[0] for item in nutrients])

    except Exception as e:
        print(f"Error occurred: {str(e)}")
    finally:
        graph_manager.close()

if __name__ == "__main__":
    main()


The response below is for transformer model
Mapped Keywords: ['Poor vision', 'vision problems', 'Vision problems']
Nutrients: [['Vitamin E', ['https://pubmed.ncbi.nlm.nih.gov/30681767/'], ['Tocopherols and tocotrienols', 'Almonds', 'Sunflower seeds', 'Spinach', 'Avocado', 'vegetable oils', 'barley', 'oat', 'palm oil', 'rice bran', 'rye', 'wheat germ', 'fruits', 'seafood', 'cheese', 'eggs', 'Alpha-tocopherol', 'Vitamin E-containing foods like nuts, seeds, and vegetable oils', 'Nuts and seeds', 'Vegetable oils', 'Green leafy vegetables', 'Sweet potato', 'Tocopherols', 'Tocotrienols', 'wheat germ oil', 'nuts and seeds', 'green leafy vegetables', 'Sunflower oil', 'rice bran oil', 'tocotrienol', 'tocopherol', 'α-TocH', 'α-tocotrienol', 'γ-tocotrienol', 'δ-tocotrienol', 'Hevea latex', 'Palm tocotrienol', 'nuts', 'Gamma-tocotrienol', 'Delta-tocotrienol', 'Palm, olive, or sunflower oil', 'Alpha-Tocopherol', 'Beta-Carotene', 'Vitamin C', 'Vitamin A', 'Rice bran', 'Palm-tocotrienol', 'Red meat',

In [7]:
if __name__ == "__main__":
    main()

The response below is for transformer model
Mapped Keywords: ['Muscle cramps and spasms', 'Muscle cramps and weakness', 'Muscle cramps']
Nutrients: [['Potassium', ['https://pubmed.ncbi.nlm.nih.gov/18607145/'], ['Spinach', 'Avocado', 'Dairy products', 'Broccoli', 'Kale', 'Bananas', 'Fruits (e.g. bananas, oranges)', 'Vegetables (e.g. spinach, potatoes)', 'bananas', 'avocados', 'spinach', 'potatoes', 'Oranges', 'Potatoes', 'tuna', 'Dairy products (e.g. yogurt)', 'Fruits (e.g. fresh fruit consumption)', 'Vegetables', 'Whole grains', 'Dietary sources of potassium', 'Fruits', 'Legumes', 'Apricots', 'Banana', 'Beans (white, cooked)'], ['120 meq/day', 'Recommended daily intake varies by age and sex, but generally around 4,700 mg per day for adults', '3,400 mg for adult males', '2,600 mg for adult females', '3500-4700 mg per day for adults', '3500-4700 mg per day', 'Adults: 2,600-3,400 mg/day', 'Children: 2,000-2,500 mg/day', 'Adults: 4,700 mg/day', '3500-4700 mg/day for adults', 'Adults: 4,700

In [8]:
if __name__ == "__main__":
    main()

The response below is for transformer model
Mapped Keywords: ['Numbness or tingling in extremities', 'Numbness and tingling in extremities', 'Cardiovascular abnormalities']
Nutrients: [['Magnesium', ['https://pubmed.ncbi.nlm.nih.gov/33260549/'], ['Almonds', 'Spinach', 'Avocado', 'Nuts and seeds', 'Green leafy vegetables', 'nuts and seeds', 'green leafy vegetables', 'Dairy products', 'Fish', 'Breast milk', 'whole grains', 'Milk', 'Leafy green vegetables', 'Breakfast cereals', 'Kale', 'Bananas', 'Vegetables', 'Whole grains', 'Fruits', 'Legumes', 'Avocados', 'Magnesium-rich foods such as nuts, seeds, whole grains, and leafy green vegetables', 'magnesium-rich foods', 'Legumes, seafood, and dark chocolate', 'milk-based diet', 'solid foods', 'Magnesium-rich foods', 'Seeds', 'Nuts (almonds, cashews, Brazil nuts, peanuts)', 'Whole grain breads and cereals (brown rice, millet)', 'Some fruits', 'Cocoa', 'Pseudo cereal and whole-grain wheat, oat, and millet', 'Unrefined whole grains, nuts, legume

In [9]:
if __name__ == "__main__":
    main()

The response below is for transformer model
Mapped Keywords: ['Excessive bleeding', 'Increased risk of excessive bleeding', 'Increased risk of bleeding']
Nutrients: [['Vitamin K', ['https://pubmed.ncbi.nlm.nih.gov/27732556/'], ['Vegetable oils', 'Green leafy vegetables', 'Liver', 'Dairy products', 'Eggs', 'Egg yolks', 'Cheese', 'Meat', 'Leafy green vegetables (such as spinach, kale, and broccoli)', 'Alfalfa', 'Putrefied fish meal', 'Leafy green vegetables', 'Some fruits (such as blueberries and figs)', 'Leafy green vegetables (such as spinach, kale)', 'Broccoli', 'Brussels sprouts', 'Choleic acid', 'Phylloquinone oxide', 'Leafy green vegetables (e.g. spinach, kale)', 'Dairy products (e.g. cheese)', 'Meat (e.g. liver)', 'Green vegetables', 'Cheeses', 'Natto (Japanese soybean product)', 'Phylloquinone (derived from plants)', 'Menaquinones (derived from bacteria)', 'Phylloquinone (Vitamin K1)', 'Menadione (Vitamin K3)', 'Menatetrenone (Vitamin K2)', 'Vegetable oils (such as soybean oil an

In [10]:
if __name__ == "__main__":
    main()

The response below is for transformer model
Mapped Keywords: ['Muscle cramps and weakness', 'Muscle cramps', 'muscle cramps']
Nutrients: [['Potassium', ['https://pubmed.ncbi.nlm.nih.gov/18607145/'], ['Spinach', 'Avocado', 'Dairy products', 'Broccoli', 'Kale', 'Bananas', 'Fruits (e.g. bananas, oranges)', 'Vegetables (e.g. spinach, potatoes)', 'bananas', 'avocados', 'spinach', 'potatoes', 'Oranges', 'Potatoes', 'tuna', 'Dairy products (e.g. yogurt)', 'Fruits (e.g. fresh fruit consumption)', 'Vegetables', 'Whole grains', 'Dietary sources of potassium', 'Fruits', 'Legumes', 'Apricots', 'Banana', 'Beans (white, cooked)'], ['120 meq/day', 'Recommended daily intake varies by age and sex, but generally around 4,700 mg per day for adults', '3,400 mg for adult males', '2,600 mg for adult females', '3500-4700 mg per day for adults', '3500-4700 mg per day', 'Adults: 2,600-3,400 mg/day', 'Children: 2,000-2,500 mg/day', 'Adults: 4,700 mg/day', '3500-4700 mg/day for adults', 'Adults: 4,700 mg per day