In [None]:
import weaviate 
from weaviate.connect import ConnectionParams
from weaviate.classes.config import Configure
import json
from weaviate.classes.config import Property, DataType
from weaviate.classes.init import AdditionalConfig, Timeout
from weaviate.classes.query import Filter
import pandas


In [None]:
df = pandas.read_json("full_format_recipes.json")
df.info()

In [None]:
#remove duplicates
df_removed = df.drop_duplicates(subset=['title'], keep='first')
df_removed.info()

In [None]:
unique_data = df_removed.to_dict(orient='records')

In [None]:
client = weaviate.connect_to_local(
    additional_config=AdditionalConfig(
        timeout=Timeout(query=60)
    )
)

In [None]:
#delete the schema
client.collections.delete("Recipes")

In [None]:
properties = [
    Property(name="directions", data_type=DataType.TEXT_ARRAY),
    Property(name="fat", data_type=DataType.NUMBER),
    Property(name="date", data_type=DataType.TEXT),
    Property(name="categories", data_type=DataType.TEXT_ARRAY),
    Property(name="calories", data_type=DataType.NUMBER),
    Property(name="desc", data_type=DataType.TEXT),
    Property(name="protein", data_type=DataType.NUMBER),
    Property(name="rating", data_type=DataType.NUMBER),
    Property(name="title", data_type=DataType.TEXT),
    Property(name="ingredients", data_type=DataType.TEXT_ARRAY),
    Property(name="sodium", data_type=DataType.NUMBER),
]

client.collections.create(
    "Recipes",
    properties=properties,
    generative_config=Configure.Generative.ollama(
        api_endpoint="http://host.docker.internal:11434",
        model="mistral",
    ),
    vectorizer_config=[
        Configure.NamedVectors.text2vec_ollama(
            name="text2vec",
            model="nomic-embed-text",
            api_endpoint="http://host.docker.internal:11434",
        )
    ]
)


In [None]:
collection = client.collections.get("Recipes")

In [None]:
counter = 0
interval = 100

with client.batch.fixed_size(batch_size=200) as batch:
        for recipe in unique_data:

            properties = {
                    "directions": recipe.get("directions", []),  # Use an empty list as the default value
                    "fat": recipe.get("fat", 0),  # Use 0 as the default value for numeric fields
                    "date": recipe.get("date", ""),
                    "categories": recipe.get("categories", []),
                    "calories": recipe.get("calories", 0),
                    "desc": recipe.get("desc", ""),
                    "protein": recipe.get("protein", 0),
                    "rating": recipe.get("rating", 0.0),
                    "title": recipe.get("title", ""),
                    "ingredients": recipe.get("ingredients", []),
                    "sodium": recipe.get("sodium", 0),
                } 

            batch.add_object(
                collection="Recipes",
                properties=properties, 
            )

            counter += 1
            if counter % interval == 0:
                print(f"Added {counter} recipes")

print(f"Added {counter} recipes")

In [None]:
recipes_config = collection.config.get()
schema = client.collections.get("Recipes").config.get(simple=True)
print("Schema: ", schema)
#get collection count
print("Recipes count: ", collection.aggregate.over_all(total_count=True).total_count)

In [None]:
response = collection.query.near_text(
    query="Apple",
    limit=5,
    filters=Filter.by_property("rating").equal(5)
)

In [None]:
response = collection.query.near_text(
    query="Apple",
    limit=5,
    filters=Filter.by_property("rating")
    
)

In [None]:
#testing embedding retrieval

for obj in response.objects:
    print(obj.properties["title"])
    print("")

In [None]:
response = collection.generate.near_text(
    query="Apple",
    limit=2,
    grouped_task="which recipe do you recommend for beginner cook?, do not provide steps or recipes. just the title and your opinion based on the description and ingredients. if there is none, say so. in less than 50 words.",
    grouped_properties=["desc","title", "ingredients"],
    filters=Filter.by_property("rating").equal(5),
)

print(response.generated)

In [None]:
result = client.graphql_raw_query(
"""    {
        Get {
            Recipes(
                limit: 2
                nearText: {
                    concepts: ["Apple"]
                }
            ){
                desc
                _additional {
                    summary (
                        properties: ["desc"]
                    )
                    {
                        property
                        result
                    }
                }
            }
        }
    }"""
)



In [None]:
print(result)