In [None]:
import re
import json
from langchain_core.documents import Document
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS
from uuid import uuid4
import numpy as np
from numpy.linalg import norm
import ollama
from langchain_ollama import OllamaEmbeddings
from langchain_ollama import ChatOllama


In [6]:
def extract_first_digit(text):
    """
    Extracts the first digit found in a string.

    Args:
        text: The input string.

    Returns:
        The first digit found in the string, or None if no digit is present.
    """
    match = re.search(r'\d', text)
    if match:
        return match.group(0)
    return None

def clean_mana_cost(text: str):
    
    if len(text)>0:
        
        first_d=extract_first_digit(text)
        
        if first_d is None:
            first_d="zero uncolored"
        else:
            first_d=first_d+" uncolored"    

        aux_text=re.sub(r'\d',first_d, text)
        aux_text=re.sub(r'}{', ', and ', aux_text)
        aux_text=re.sub(r'G', ' 1 green ', aux_text)
        aux_text=re.sub(r'B', ' 1 black ', aux_text)
        aux_text=re.sub(r'U', ' 1 blue ', aux_text)
        aux_text=re.sub(r'R', ' 1 red ', aux_text)
        aux_text=re.sub(r'W', ' 1 white ', aux_text)
        aux_text=re.sub(r'}', ' ', aux_text)
        aux_text=re.sub(r'{', ' ', aux_text)
    else:
        aux_text=" no information about mana cost"
    
    return aux_text

In [None]:
# Open and read the JSON file
with open('C:/Users/card1/Downloads/oracle-cards-20250510090302.json', 'rb') as file:
    data = json.load(file)

len(data)

34541

In [28]:
card_creature_template = """The card name is \"{name}\". The type is \"{type}\". The efect of the card is \"{efect}\" The mana cost of the card is \"{mana_cost}\". The toughness is \"{toughness}\", and the power is \"{power}\"."""
card_spell_template = """The card name is \"{name}\". The type is \"{type}\". The efect of the card is \"{efect}\" The mana cost of the card is \"{mana_cost}\"."""

def extract_information(test_dict):
# initializing dictionary
#test_dict = data[0]
 
# printing original list
#print("The original dictionary : " + str(test_dict))
 
# Using dictionary comprehension + items()
# Extracting specific keys from dictionary
   res = {key: test_dict[key] for key in test_dict.keys()
          & {'name', 'lang','released_at','mana_cost','cmc',
             'type_line',
             'oracle_text',
            'power',
            'toughness',
            'colors',
            'color_identity',
            'keywords',
             }}
   
   if 'mana_cost' in res:
      mana_c=clean_mana_cost(res['mana_cost'])
   else:
      mana_c="not available"


   if 'oracle_text' not in res:
      res["oracle_text"] = "not available"


   #card=card_template.format(name=res['name'],type=res['type_line'],efect=res['oracle_text'],mana_cost=res['mana_cost'],toughness=res['toughness'],power=res['power'])
   if 'toughness' not in res and 'power' not in res:
      card=card_spell_template.format(name=res['name'],type=res['type_line'],efect=res['oracle_text'],mana_cost=mana_c)
   else:
      card=card_creature_template.format(name=res['name'],type=res['type_line'],efect=res['oracle_text'],mana_cost=mana_c,toughness=res['toughness'],power=res['power'])

   card = re.sub(r'"', '', card)
   card = re.sub(r'{', ' ', card)
   card = re.sub(r'}', ' ', card)
   #card = re.sub(r'\..', '.', card)   

   return card

In [None]:

documents=[]

for i in range(len(data)):
    #print(i)
    document=Document(page_content=extract_information(data[i]),metadata={"source": data[i]['related_uris']})
    documents.append(document)


In [None]:
embeddings = OllamaEmbeddings(
    model="llama3.2",
)

In [None]:
index = faiss.IndexFlatL2(len(embeddings.embed_query("hello world")))

vector_store = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={},
)
#Construcción de la base vectorial
uuids = [str(uuid4()) for _ in range(len(documents))]
vector_store.add_documents(documents=documents, ids=uuids)

['a9fa2b04-4388-46b3-abcc-2db4ee9cfbc8',
 '6b871d85-f9be-4114-aafa-c584213fd5b7',
 '98e4e2b3-ef86-4d12-8e5d-5b6b6acbcae8',
 'c5d1b09c-e698-4a47-acf1-0cc0b31b0f54',
 'bb30b6a7-1dd7-4ccb-bd69-0211974b79ad',
 '89cdad3c-892a-400e-acf2-c3dd53036936',
 'dd3601da-1c75-4303-951e-628925a56268',
 '94acc866-23d4-4068-b576-af4e55f9b5d1',
 'ef764635-2923-4432-969c-13e464bb1d30',
 '907f31f3-af2e-4921-9916-1d1542df17b0',
 '77a8e249-e696-444b-a965-4f32d08bd3f2',
 'ca9c683c-a6ad-4162-8c89-298beedb3dc7',
 'e66a1fdb-cf55-4cc6-b860-af16d5fd7e54',
 '5a446364-4a46-421f-9348-8a1f5023b9d6',
 '8bd5d248-582f-4bf4-a8a3-d98cf5d14312',
 'c6fcd57e-619a-45a5-85a3-e2a45afa402b',
 '53aa0f03-b85c-484b-954b-0bd675ed8bd7',
 '399f7ef2-d46d-4fe3-a4f3-6d638c8ecb71',
 '40ce2986-92b9-4af8-a8a7-1f01c50b9c50',
 '31d2e001-adde-4a99-bdd0-ba57b6f30925',
 '0be51694-e5cf-411a-8727-140853130424',
 'ef42f10f-665f-4654-8c68-36f4caf7c7af',
 'e891465f-03a0-49d9-b37b-62b51adaff13',
 '3307de47-369e-4a53-94e4-edfee191e09d',
 'c6e4734b-b512-

In [None]:
save_directory="/v_database/"
vector_store.save_local(save_directory)