## Description :

- This project aims to create a simple Graph specialized in generating Charts, based on a User question , for exemple :
  - Give a Chart for the water level in Alwahda Dam in Morroco ?
 - Because We will ask questions about Morroco, To get good info from the web the search should be in Arabic or French not english !

## Setup :

curl -fsSL https://ollama.com/install.sh | sh

ollama pull llama3-groq-tool-use:8b

ollama pull mistral-nemo:12b

ollama pull llama3.1:8b       

ollama pull codegeex4:latest  `Not working with Function calling , but is good at Code related tasks`

ollama pull nomic-embed-text  `For Embedding`



\n

## Build LLMs and Tools :

### Tools :

In [3]:
import os

os.environ["Exa_API_KEY"] = "585502c2-5e53-41ba-bc2d-de8de81b00d6"

In [4]:
from exa_py import Exa
from langchain.agents import tool

exa = Exa(api_key=os.environ["Exa_API_KEY"])


@tool
def search(query: str, include_domains=None, start_published_date=None):
    """Search for a webpage based on the query.
    Set the optional include_domains (list[str]) parameter to restrict the search to a list of domains.
    Set the optional start_published_date (str) parameter to restrict the search to documents published after the date (YYYY-MM-DD).
    """
    return exa.search_and_contents(
        f"{query}",
        use_autoprompt=True,
        num_results=5,
        include_domains=include_domains,
        start_published_date=start_published_date,
    )
 

@tool
def get_contents(ids: list[str]):
    """Get the contents of a webpage.
    The ids passed in should be a list of ids returned from `search`.
    """
    return exa.get_contents(ids)


In [9]:
search.args

{'query': {'title': 'Query', 'type': 'string'},
 'include_domains': {'title': 'Include Domains'},
 'start_published_date': {'title': 'Start Published Date'}}

In [24]:
input = {"query":"Barrage Alwahda 2015 2024 maroc"}
response = search.invoke(input)
response.results

[Result(url='https://albaraka.ma/', id='https://albaraka.ma/', title='Guide des Banques et Assurances au Maroc - Albaraka.ma', score=0.1557559221982956, published_date='2015-07-14', author='', text="Emplacements ajoutÃ©es rÃ©cemment  \n \n \n  \nSAHAM Assurance – Assurances Arrahma\nAgence d’assurance Casablanca\n \n Adresse Boulevard Taza, rue 89, nÂ° 112 Rue 300 Hay Moulay Abdellah, Casablanca 20000\n \n   \n \n  \nAgecap – Casablanca – +212 5222-24180\nCourtier d’assurances\n \nAdresse 88, Avenue Mers Sultan, 2?Et., Grand Casablanca, 20000, Casablan, Casablanca 20100\n CatÃ©gories Courtier d'assurances Courtier d'assurances Casablanca\n \n   \n \n  \nCIH Bank – TÃ©mara –\nCIH Bank – Ø§Ù\x84Ù\x82Ø±Ø¶ Ø§Ù\x84Ø¹Ù\x82Ø§Ø±Ù\x8a Ù\x88 Ø§Ù\x84Ø³Ù\x8aØ§Ø\xadÙ\x8a TÃ©mara\n \n Adresse Ave Moulay Ali Chrif, Temara\n \n   \n \n  \nMAMDA MCMA MAEM Socrate\nAgence d’assurance Casablanca\n \n Adresse 1-3Ø\x8c Les rÃ©sidences MASUREL, Rue Socrate, Casablanca 20380\n \n   \n \n  \nAssurance Tifaoui

In [4]:
from langchain.agents import Tool
from langchain_experimental.utilities import PythonREPL

Python_Interp = PythonREPL()

In [5]:
Python_Interp.run("print(1+1)")

Python REPL can execute arbitrary code. Use with caution.


'2\n'

### LLMs :

#### Converter :

In [6]:
# Chooser LLM:

#from typing import Literal
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_openai import ChatOpenAI




class RouteQuery(BaseModel):
    """Convert a user question to an optimized and accurate Web search query"""

    query: str = Field(...,description="This is an optimized and accurate Web search query based on the User question")


# LLM with Structered Output
llm = ChatOpenAI(api_key="ollama",model="llama3.1:8b",base_url="http://localhost:11434/v1")
structured_llm_converter = llm.with_structured_output(RouteQuery)

# Prompt
system = """You are an expert at converting french user question to an optimized and accurate french Web search query.\n
Correct any tipo or spelling mistakes in the Web search query before give it, it should be correct.\n
Don't add any additional details or explainations give just the correct and good Web search query."""

converter_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "The User qeustion here :\n {question} "),
    ]
)
  
Converter = converter_prompt | structured_llm_converter 

- Test

In [11]:
User = "je veux des statistics a propos le remplisage de Brrage Alwahda de 2015 a 2024 ?"
input = {"question":User}
print(input)
converted_question = Converter.invoke(input)
converted_question

{'question': 'je veux des statistics a propos le remplisage de Brrage Alwahda de 2015 a 2024 ?'}


RouteQuery(query='statistiques remplissage Stade Alwahda 2015 2024')

#### Reporter :

In [7]:
#from typing import Literal
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_openai import ChatOpenAI

## We applicate Data Validation to the LLM response to insure that the response instructered as we want

class Reporter_Data(BaseModel):
    """Extract and Orginize web search contents into  the main informations in forme series of values"""

    Report: dict = Field(...,description="""This is python dictionary with keys and values you should extract  from a french web search content and based on the User question""")


# LLM with Structered Output
llm = ChatOpenAI(api_key="ollama",model="mistral-nemo:12b",base_url="http://localhost:11434/v1")
structured_llm_reporter = llm.with_structured_output(Reporter_Data)

# Prompt
system = """You are an expert at Extracting and Orginizing french web search content into  the useful informations in forme of a python dictionary.\n
This python dictionary with keys and values you should extract from the web search content.\n
Give attention to values you see that there are necessary to respond to the User Question.\n
If there a missing value  just keep it empty dont't invent new values from your own . Informations should be just from the French Web search Content.\n
The Web search content may countain outher statistics and values but keep attention only  to values related to the User question.\n
Don't add any additional details or explainations give just the correct and good python dictionary."""

reporter_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "The User qeustion here :\n {question} \n\n The French Web search Content here : \n {Web_Search}"),
    ]
)
  
Reporter = reporter_prompt | structured_llm_reporter 

- Test :

In [7]:
def Get_Content(results:list)->str:
    Content = "\n-------\n".join(result['content'] for result in results)
    return Content

In [12]:
Tavily_response = Tavily.invoke(converted_question.query)
Tavily_response

[{'url': 'https://www.espn.com/soccer/team/stats/_/id/7134',
  'content': 'Get the full Al-Wahda stats for the 2023-24 season on ESPN. Includes leaders in goals, assists, yellow and red cards, and longest streaks.'},
 {'url': 'https://fr.le360.ma/economie/barrage-al-wahda-un-taux-de-remplissage-considerable-pour-le-plus-grand-barrage-du-maroc_NLPOUJGEPFAVDFMN3H3IHVZWTM/',
  'content': 'Après une baisse sans précédent, le taux de remplissage du barrage Al Wahda, le plus grand du Royaume, renoue avec une tendance à la hausse, notamment après les récentes pluies du mois de ...'},
 {'url': 'https://fr.statista.com/statistiques/830874/taux-remplissage-stades-bundesliga/',
  'content': "Cette statistique représente le taux de remplissage des stades de football des équipes de la Bundesliga durant la saison 2023-2024. Dans le classement des clubs de Bundesliga avec le taux d'occupation de stade le plus élevé, Eintracht Dortmund est en tête. En effet, le stade est toujours plus que complet."},


In [31]:
input = {"question":User,
"Web_Search":Get_Content(Tavily_response)}
print(input)
report = Reporter.invoke(input)
report

{'question': "je veux savoir Niveu d'eau dans le Barrage Alwahda au Maroc anne les 5 derniers annes ?", 'Web_Search': "Lire aussi : Barrages: au 15 avril, un taux de remplissage de 51% du bassin du Sebou. « En tant que plus grand réservoir du Royaume, le barrage Al Wahda joue un rôle essentiel dans la ...\n-------\nDR N° 1097 du 19 au 25 July 2024 Voir le ... rapport révèle des différences significatives entre les barrages en termes de niveaux d'eau et de taux de remplissage : Le barrage d'Al Wahda ...\n-------\nLa Direction régionale du Nord de la branche Eau de l'Office national de l'électricité et de l'eau potable va lancer la mise à niveau du système de comptage et de celui de la régulation du barrage Al Wahda. X. ECONOMIE; ... A la découverte du Maroc; Morocco Now; SIAM 2024; CAN 2023; Voyager au temps du Covid; Assemblée Annuelles ...\n-------\nAu 20 février 2022, le taux de remplissage des principaux barrages du pays s'élevait à peine à 33,1%, soit près de 15% de moins que le po

### Grader :

In [8]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langchain_core.pydantic_v1 import BaseModel, Field
from typing import Literal

## We applicate Data Validation to the LLM response to insure 
# that the response instructered as we want

class Grader_Data(BaseModel):
    """Evaluate The Report of the Reporter Agent as Good/Bad report"""

    Grade: Literal["Good","Bad"] = Field(...,description="""This field is your evaluation for a report of informations if there good to 
    responde a User question or not""")

# LLM with Structered Output
llm = ChatOpenAI(api_key="ollama",model="llama3.1:8b",base_url="http://localhost:11434/v1")
structured_llm_grader = llm.with_structured_output(Grader_Data)

# Prompt
system = """You are an expert at evaluating Reports and informations.\n
Given the a set of values in forme of a python dictionary extracted by a Reporter Agent  from a french web
search contents and a  User question you should evaluate if the python dictionary give enought 
data to answer the User question OR Not.\n
If you see that values of the python dictionary are missing or not enought to answer the User question 
Grade it by "Bad", Outherwise grade it by "Good".\n
The python dictionary will be used by anouther Agent to create a Chart.\n
Your response should be Good/Bad key word.\n
Don't add any additional details or explanations just the key word."""

grader_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "The python dictionary here :\n {Python_Dictionary}\n\n The User question here :\n {question}")
    ]
)
        
Grader = grader_prompt | structured_llm_grader

In [None]:
User = 
Dictionary = 
input = {"Python_Dictionary":Dictionary,"question":User}
Grade = Grader.invoke(input)
Grade

## Initialize The Graph:

In [None]:
from typing_extensions import TypedDict
import pandas as pd
from langgraph.graph import StateGraph, START, END

class State(TypedDict):
    Question: str
    SQL_query: str
    Tables_Names: str
    Tables_Schemas: str
    Tables_Need: str
    Grade: str
    Finale_Response: pd.DataFrame

graph_builder = StateGraph(State)


## Nodes Functions :

In [None]:
from Colors import BLUE,RED,YELLOW,GREEN,RESET

### Tools :

### LLMs:

## Design the Graph:

### Build :

### Visualize :

## Interact with The Graph :

In [None]:
# Run
inputs = {
    "Question": "Which customers are supported by the employee with the first name 'Jane'?" +"be accurate and Optimale"
}
for output in SQL_Graph.stream(inputs):
    for key, value in output.items():
        # Node
        print(f"Node :{key}")
        print(f"State :{value}")
    print("\n")

value["Finale_Response"]