# Librerias necesarias

In [2]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.llms import Ollama
from traduccion_sql_ln import *
from parser_SQL import *
from embeddings import *
import mdpd

# Configuración inicial

## Configuración embeddins

In [3]:
modelPath = "thenlper/gte-large"

model_kwargs = {'device':'cuda'}

encode_kwargs = {'normalize_embeddings': False}

embeddings = HuggingFaceEmbeddings(
    model_name=modelPath,     # Provide the pre-trained model's path
    model_kwargs=model_kwargs, # Pass the model configuration options
    encode_kwargs=encode_kwargs # Pass the encoding options
)

  from tqdm.autonotebook import tqdm, trange


In [4]:
db = cargar_embeddings('emmbeddings_db', 'indices_paises_wikis', embeddings)

## Configuración LLM

In [5]:
retriever = db.as_retriever()

ollama = Ollama(
    base_url='http://localhost:3030',
    model="llama2-uncensored",
    num_ctx=4096,
    temperature = 0.2
)

# Preparación para hacer las preguntas

In [6]:
# Configuraciones para hacer las preguntas
system_prompt=("You are a highly intelligent question answering bot. "
               "You will answer concisely. "
               "Use only the given context to answer the question. "
               "Context: {context}"
               "\n{format_instructions}")

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "In the next table there are the answer of the question\n{question}"),
    ],
)

def format_docs(docs):
    text = "\n\n".join(doc.page_content for doc in docs)
    print("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$")
    for doc in docs:
        print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
        print(doc.metadata['source'])
        print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
        print(doc.page_content)
    print("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$")
    return text

def crear_instrucciones(columnas: list[str]):
    texto = "Instructions: \n"
    texto = "Format the information as a table with columns for "
    
    if len(columnas) == 1:
        texto += columnas[0]
    elif len(columnas) > 1:
        texto += ", ".join(columnas[:-1]) + f" and {columnas[-1]}"
            
    texto += " Your response should bein CSV format\n"

    texto += "If your answer is a number like millions or thousands, return the always all its digits using the format used in America. \n"
    texto += "If I ask you a question that is rooted in truth, you will give you the answer.\n"
    texto += "If I ask you a question that is nonsense, trickery, or has no clear answer, you will respond with 'Unknown'. "
    
    return (lambda *args: texto)

def crear_ejemplos():
    texto = "Examples: \n"
    fewshot_chatgpt = [
                        ['What is human life expectancy in the United States?', '78.'],
                        ['Who was president of the United States in 1955?', 'Dwight D. Eisenhower.'],
                        ['Which party was founded by Gramsci?', 'Comunista.'],
                        ['What is the capital of France?', 'Paris.'],
                        ['What is a continent starting with letter O?', 'Oceania.'],
                        ['Where were the 1992 Olympics held?', 'Barcelona.'],
                        ['How many squigs are in a bonk?', 'Unknown'],
                        ['What is the population of Venezuela: 28,300,000']]
    
    texto += "\n".join([': '.join(shot) for shot in fewshot_chatgpt])
    return (lambda *args: texto)

def hacer_consulta(traduccion: str, columnas: list[str]):

    columnas_traduccion = type(columnas)(columnas)
    print(f"Procesando la pregunta:\n\t{traduccion}")
    rag_chain = (
        {"context": retriever | format_docs, 
        "question": RunnablePassthrough(),
        "format_instructions": crear_instrucciones(columnas_traduccion),
        "examples": crear_ejemplos()}
        | prompt
        | ollama
    )

    # return rag_chain.invoke(traduccion)
    resultado_limpio = rag_chain.invoke(traduccion)
    print("Resultado sin procesar: ")
    print(resultado_limpio)
    print("############################################################")
    df = mdpd.from_md(resultado_limpio)
    if len(df) != 0:
        if len(df.columns) > len(columnas):
            # Hacer una busqueda de similitud por los nombres
            df.columns = columnas + list(df.columns)[len(columnas):]
        elif len(df.columns) < len(columnas):
            # Hacer una busqueda de similitud por los nombres
            df.columns = columnas[:len(df.columns)]
        else:
            df.columns = columnas

    return df

def hacer_pregunta(pregunta: str, contexto: str = "", instrucciones_extra:str = "", con_ejemplos: bool = False):

    system_prompt=("You are a highly intelligent question answering bot. "
                    "You will answer concisely. "
                    "Use only the given context to answer the question. "
                    "Context: {context}"
                    "\n{format_instructions}"
                    "\n{examples}")

    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_prompt),
            ("human", "{question}"),
        ],
    )
    instrucciones = "Intructions: "
    instrucciones += instrucciones_extra
    datos = {"context": lambda x: format_docs(contexto), 
            "question": RunnablePassthrough(),
            "format_instructions": lambda *args: instrucciones} 
    
    datos['examples'] = lambda *args: ""
    
    if con_ejemplos:
        datos['examples'] = crear_ejemplos()
    print(f"Procesando la pregunta:\n\t{pregunta}")
    rag_chain = (
        datos
        | prompt
        | ollama
    )

    resultado_limpio = rag_chain.invoke(pregunta)
    return resultado_limpio

In [25]:
# SELECT t1.area FROM country as t1 WHERE t1.countryName = "Spain";
respuesta = hacer_consulta('For all the next countries Response True if population has less than 1 Million else Response False. Country list: ( United States, Canada, Mexico, Russia, China, Brazil, India, Indonesia, Japan, Germany, France, United Kingdom, Italy, Spain, Australia, Argentina, South Africa) ', ["countryName", "Response"])
print(respuesta)

Procesando la pregunta:
	For all the next countries Response True if population has less than 1 Million else Response False. Country list: ( United States, Canada, Mexico, Russia, China, Brazil, India, Indonesia, Japan, Germany, France, United Kingdom, Italy, Spain, Australia, Argentina, South Africa) 
$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
resultados/paises_wikipedia/South Sudan.txt
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
error{color:#f8a397}html.skin-theme-clientpref-os .mw-parser-output .cs1-maint{color:#18911F}}"World Population Prospects 2022". population.un.org. United Nations Department of Economic and Social Affairs, Population Division. Retrieved July 17, 2022.
 ↑ "World Population Prospects 2022: Demographic indicators by region, subregion and country, annually for 1950-2100" (XSLX). population.un.org ("Total Population, as of 1 July (thousands)"). United Nations Department of Economic and Social Affairs, Population Division.

In [11]:
contexto = retriever.invoke("this is a record of a table that have country name as its columns: (Caracas, 1,000,363)")

print(hacer_pregunta("knowing that the city name is Caracas and the city population is 1,000,363. we can afirm that the city has less than 1M population is:  ", 
                     contexto, 
                     "You response must be only 'True' or 'False'\n don't Explain yourself\n don't apologize if you can't response\n in case that you can response the question say 'False'\n"))

Procesando la pregunta:
	knowing that the city name is Caracas and the city population is 1,000,363. we can afirm that the city has less than 1M population is:  
$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
resultados/paises_wikipedia/Venezuela.txt
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 original on 2013-10-02. Retrieved 2013-08-24.
 ↑ 27,951,000 according to "República Bolivariana de Venezuela.". www.imf.org. Retrieved 2 January 2020.
 https://simple.wikipedia.org/w/index.php?title=Venezuela&oldid=9129437 Categories Venezuela Spanish-speaking countries Current dictatorships 1845 establishments 1840s establishments in South America CS1 Spanish-language sources (es) CS1 errors: missing periodical Articles containing Spanish-language text Articles with Spanish-language sources (es) Pages using infobox country with unknown parameters Articles with FAST identifiers Articles with VIAF identifiers Articles with WorldCat identifiers Articles with 

In [10]:
contexto = retriever.invoke("this is a record of a table that have country name and county population as its columns: (Caracas, 1,000,363)")

print(hacer_pregunta("knowing that the city name is Caracas and the city population is 1,000,363. The Country of the city is:  ", 
                     contexto, 
                     "your response must be the shortest one\ndon't Explain yourself\ndon't apologize if you can't response\nin case that you can response the question say 'Unknow'\n",
                     True))

Procesando la pregunta:
	knowing that the city name is Caracas and the city population is 1,000,363. The Country of the city is:  
$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
resultados/paises_wikipedia/Venezuela.txt
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 original on 2013-10-02. Retrieved 2013-08-24.
 ↑ 27,951,000 according to "República Bolivariana de Venezuela.". www.imf.org. Retrieved 2 January 2020.
 https://simple.wikipedia.org/w/index.php?title=Venezuela&oldid=9129437 Categories Venezuela Spanish-speaking countries Current dictatorships 1845 establishments 1840s establishments in South America CS1 Spanish-language sources (es) CS1 errors: missing periodical Articles containing Spanish-language text Articles with Spanish-language sources (es) Pages using infobox country with unknown parameters Articles with FAST identifiers Articles with VIAF identifiers Articles with WorldCat identifiers Articles with WorldCat Entities identifiers A

In [None]:
prompt_str = "You are a highly intelligent question answering bot. If I ask you a question that is rooted in truth, you will give you the answer. If I ask you a question that is nonsense, trickery, or has no clear answer, you will respond with 'Unknown'. You will answer concisely. Use the given context as a support to answer the question if you can't answer the question.Context:  with almost 3,000 delegates. The NPC is often described as a \"rubber stamping\" body; it vetoed no bills and members have complained about their lack of power.\n China largely follows a centralized system of government, and is administratively divided into 22 provinces, 5 autonomous regions and 4 directly controlled municipalities. Provincial governments have limited powers in their internal and economic affairs. Autonomous regions have more autonomy than provinces, such as the right to declare additional official languages and holidays in additional to Mandarin and other national holidays. Directly controlled municipalities are cities that are not part of any province, and instead report directly to the central government in Beijing. Often having different names, prefectures ((地级)市/自治州/地区), counties (县/(县级)市/区) and townships (乡/镇/街道) are one by one subordinated.\n The Special Administrative Regions (SAR), Hong Kong and Macau, in principle run themselves as separate jurisdictions, with only foreign policy and defense being controlled by Beijing, though in practice this autonomy is restricted. There is an active independence movement in Hong Kong, but the imposition of the National Security Law in 2020 has largely driven the movement underground.\n The PRC considers Taiwan to be one of its provinces, but Taiwan's government (the Republic of China) has been completely separate from the mainland Chinese one since 1949. Both governments on paper continue to claim to be the sole legitimate government for all of China. There is significant support for formal independence in Taiwan, but China's government has repeatedly threatened to launch a military attack on Taiwan if the island declares itself independent. See Chinese provinces and regions for more detail.\n \n People and customs[edit] \n Minority cultures of China With 1.4 billion inhabitants, China is home to nearly a fifth of the world's population. It is a diverse place with large variations in culture, language, customs and economic levels from region to region, and often strong distinct cultural and regional identities as well.\n The economic landscape is particularly diverse. The major cities such as Beijing, Guangzhou and Shanghai are modern and comparatively wealthy. However, about 50% of Chinese still live in rural areas even though only 10% of China's land is arable. Hundreds of millions of rural residents still farm with manual labor or draft animals. Some 200 to 300 million former peasants have migrated to townships and cities in\n\n that around major holidays, places may be closed when they're usually open or open when they would usually be closed.\n \n Time zones[edit] \n Despite geographically spanning five time zones, all of China officially follows Beijing Time (UTC+8). However, in the restive province of Xinjiang, while the official time is Beijing Time, some ethnic Uyghurs use the UTC+6 time zone as a sign of defiance against Beijing.\n \n Read[edit] \n Wild Swans by Jung Chang (ISBN 0007176155) - a biography of three generations, from the warlord days to the end of Mao's era, illustrating life under China's version of nationalism and communism. This book is banned in China. The Search for Modern China by Jonathan Spence - a standard history book on modern China from the late Ming to the current period. \n See also[edit] \n \n Talk[edit] \n Chinese phrasebook  The official language of China is Standard Mandarin, known in Chinese as Pǔtōnghuà (普通话, \"common speech\"), which is based on the Beijing dialect; Chinese in general is known as Zhōngwén (中文). Standard Mandarin is the main language for government and media, as well as the national lingua franca. While the official language is standardized, local pronunciation of Mandarin does vary by region. Unless otherwise noted, all terms, spellings and pronunciations in this guide are in Standard Mandarin. While national media is broadcast in Mandarin, each area often has its own local media that broadcasts in the local language.\n Chinese is written using Chinese characters (汉字, hànzì, lit. \"Han characters\"). Unlike an alphabet that represents individual sounds without any inherent meaning, each Chinese character represents a meaningful syllable: a specific word or part of a word. Although they look impenetrable at first, there is some method to the madness: most characters are composed from base components combined with other characters (often giving clues to both pronunciation and general meaning). The same characters are used in Japan and Korea with usually similar meanings, albeit different pronunciations. However, since the 1950s mainland China has used simplified characters, such as 龟 instead of 龜, in an attempt to eradicate illiteracy. Hong Kong, Macau, Taiwan, and many overseas Chinese still use the traditional characters, which are also sometimes used on\n\n\n China (disambiguation) China (中国; Zhōngguó) is one of the world's oldest civilizations. Its long and rich history is present in people's thinking and values, and in the art, architecture, and feats of engineering that remain from dynasties past. In addition to its bustling and futuristic cities like Shanghai and Shenzhen, China boasts an incredible array of breathtaking natural wonders like the karst landscapes of Guilin and granite peaks of Huangshan that have inspired painters and poets for generations.\n After a tumultuous 19th and 20th centuries, China has dramatically re-emerged as an economic powerhouse. Its rapid development has been paralleled by an ascent onto the international stage. Of course there are growing pains when high-rises and factories surge up to dwarf centuries-old pagodas, but there's also a strong sense of enthusiasm and optimism about what the future holds. If you visit now, you can see the relics of millennia of history and experience the signs of further transformations in progress.\n \n Regions[edit] \n China's hierarchy of administrative divisions has 22 provinces (省 shěng) which tend to have their own cultural identities, and 5 autonomous regions (自治区 zìzhìqū), each with a designated minority ethnic group. These along with four municipalities (直辖市 zhíxiáshì) make up what is known as mainland China (中国大陆 Zhōngguó dàlù).\n For the purposes of Wikivoyage, these provinces are grouped into the following regions:\n Regions of China \n\n\n\n\n\n\n \n Cities[edit] \n  Beijing Here are nine of China's most interesting cities for travelers. Others are listed in region articles.\n 39.90403116.4075261 Beijing (北京) — the capital, cultural center, and home of the Forbidden City, the Summer Palace, and other important historical sites. 30.66104.0633332 Chengdu (成都) — capital of Sichuan province, known for tingly-spicy food, and home of the giant pandas. 23.13113.263 Guangzhou (广州) — one of China's most prosperous and liberal cities. Center of Cantonese culture and cuisine .  Guilin 30.25120.16754\n\n Center of Cantonese culture and cuisine .  Guilin 30.25120.16754 Hangzhou (杭州) — built around West Lake, a  UNESCO World Heritage Site, and southern terminus of the Grand Canal. 45.75126.6333335 Harbin (哈尔滨) — capital of Heilongjiang, which hosts the Ice and Snow Sculpture Festival during its bitterly cold winters. 39.4575.9833336 Kashgar (Chinese: 喀什, Uyghur: قەشقەر) — center of Uyghur culture, with a beautiful and well-preserved old town, and the famous Id Kah Mosque. 32.060833118.7788897 Nanjing (南京) — the capital during the early Ming Dynasty and Republic of China era, a renowned historical and cultural city with many historic sites. 31.166667121.4666678 Shanghai (上海) — China's largest city and main commercial centre, famous for its beautiful pre-World War II French, British and American colonial architecture, ultramodern 21st-century skyscrapers, and many shopping opportunities. 34.261111108.9422229 Xi'an (西安) — the oldest city and ancient capital of China, terminus of the ancient Silk Road, and home of the terracotta warriors. You can travel to many of these cities using the new fast trains. In particular, the Hangzhou - Shanghai - Suzhou - Nanjing line is a convenient way to see these historic areas.\n \n Other destinations[edit] \n Some of the most famous tourist attractions in China are:\n 40.416667116.0833331 Great Wall of China (万里长城) — longer than 8,000 km, this ancient wall is the most iconic landmark of China. 20.03342110.323982 Hainan (海南) — a tropical paradise island undergoing heavy tourism-oriented development. 33.2103.93 Jiuzhaigou Nature Reserve (九寨沟) — known for its many multi-level waterfalls, colourful lakes and as the home of the giant pandas. 29.5854103.757554 Leshan — most famous for its huge riverside cliff-car\nHuman: Answer the query.\nFormat the information as a table with columns for name Your response should be a table Your response should be a table\nWhich unique cities are in Asian countries where Chinese is the official language ?"

In [None]:
import tiktoken
encoding = tiktoken.get_encoding("cl100k_base")
num_tokens = len(encoding.encode(prompt_str))
print(num_tokens)

# Haciendo consultas

In [None]:
consulta_sql = '''SELECT T2.Language FROM country AS T1 JOIN countrylanguage AS T2 ON T1.Code = T2.CountryCode WHERE T1.HeadOfState = "Beatrix" AND T2.IsOfficial = "T"'''
# consulta_sql = '''select distinct t3.name from country as t1 join countrylanguage as t2 on t1.code = t2.countrycode join city as t3 on t1.code = t3.countrycode where t2.isofficial = 't' and t2.language = 'chinese' and t1.continent = "asia"'''  
lista_miniconsulta = obtener_miniconsultas(consulta_sql)['independientes']

for miniconsulta in lista_miniconsulta:
    traduccion, _, _ = miniconsulta.crear_prompt()
    miniconsulta.ejecutar()
    print("################################################")
    print(f"Pregunta: {traduccion}")
    print("Respuesta: ")
    df = miniconsulta.resultado
    print(df)


    

In [15]:
import pandas as pd
a = pd.DataFrame([[1], [2]])
a.columns=['Capital']
# print(pd.DataFrame(a['Capital'].iloc[:, 0]))

IndexingError: Too many indexers