In [None]:
! pip install -U langgraph langsmith
! pip install langchain_community
! pip install langchain_openai

In [None]:
import os
from langchain_openai import ChatOpenAI  #  
# API Key
os.environ['OPENAI_API_KEY'] = "sk-proj-rPBnsXim 0yG78BbewkjRBfeT3BlbkFJoLeEVAbOl0kiGIZTWPKAtav6nWikZPSewB3oeo055nFcLW9rnMorepgfeAZNQQz5FUUwrtmQsA"

# Model
llm = ChatOpenAI(
    model="gpt-4o-mini",  #  
    temperature=0.2
)

In [27]:
from crewai import Agent, Task, Crew, Process
from langchain_openai import ChatOpenAI
import os

# Configuration du modèle LLM
# os.environ["OPENAI_API_KEY"] = "votre-clé-api"
llm = ChatOpenAI(model="gpt-4", temperature=0.2)

# Définition des descriptions de tags exactement comme dans le prompt initial
SALES_DESCRIPTION = """
#sales = This tag is used when a user asks questions about Sales Performance. 
Maximizing Sales is the ultimate goal of any commercial production organization so understanding Sales Performance is very important.
The data includes information on Sales in various currencies and "Trade Levels" (eg Manufacturer, Wholesaler, Public/pharmacy prices) 
and is split by dimensions such as Country, Corporation, Anatomical Therapy Clase, Disease, Product, Form, Strength, Pack.
"""

SALES_EXAMPLES = """
Examples of #sales questions:
1. What is the trend of our product performance over the last quarter?
2. How does our revenue compare across different countries?
3. Which anatomical therapy class is generating the most revenue?
4. What are the top-performing products in the public/pharmacy price segment?
5. How do our sales figures vary by product form and strength?
6. What is the market share of our corporation in the European region?
7. How do sales figures differ between wholesalers and manufacturers?
8. What is the average price point for our products in the US market?
9. Which disease category is driving the highest sales for our company?
10. How have our sales figures changed year-over-year in the UK?
"""

CHANNEL_DYNAMICS_DESCRIPTION = """
#Channel_Dynamics = This tag is used when a user asks questions about any kind of Spend, 
Marketing Spend, Promoter/detractors, Mentions etc. 
The data provides information on how products are marketed and percieved.
"""

CHANNEL_DYNAMICS_EXAMPLES = """
Examples of #Channel_Dynamics questions:
1. What is the current sentiment around my brand in social media discussions?
2. How does our marketing spend compare to industry benchmarks?
3. What are the key factors influencing customer perceptions of our products?
4. How many positive reviews have we received in the last quarter?
5. What channels are driving the most engagement for our marketing campaigns?
6. How frequently are our products mentioned in online forums?
7. What is the average customer rating for our latest product launch?
8. How do our promotional efforts impact customer loyalty?
9. What trends are emerging in consumer feedback about our brand?
10. How does our brand's visibility compare to that of our main competitors?
"""

def classify_intent(question, verbose=True):
    """
    Fonction principale qui prend une question en entrée et renvoie l'intention classifiée
    
    Args:
        question (str): La question de l'utilisateur
        verbose (bool): Afficher les détails d'exécution
        
    Returns:
        str: L'intention classifiée (#sales, #Channel_Dynamics, ou #somewhereelse)
    """
    if verbose:
        print(f"Classifying intent for question: {question}")
    
    # Définition des agents
    question_analyzer = Agent(
        role="Question Analyzer",
        goal="Extract the most relevant information from user questions to determine their intent",
        backstory="""You are an expert in natural language processing and intent classification.
        Your specialty is understanding the nuances of business questions and extracting key elements
        that reveal the true intent behind them.""",
        verbose=True,
        allow_delegation=False,
        llm=llm
    )

    tag_expert = Agent(
        role="Tag Description Expert",
        goal="Analyze tag descriptions to understand their scope and relevant dimensions",
        backstory="""You have extensive knowledge of business intelligence taxonomy and categorization.
        You excel at understanding tag descriptions and identifying the key dimensions, data points,
        and business goals that each tag encompasses.""",
        verbose=True,
        allow_delegation=False,
        llm=llm
    )

    correlation_analyst = Agent(
        role="Correlation Analyst",
        goal="Determine the best matching hashtag for user questions by analyzing correlations",
        backstory="""You are a data correlation specialist with a strong background in semantic analysis.
        Your expertise lies in finding patterns and connections between user questions and predefined
        taxonomies to ensure accurate classification.""",
        verbose=True,
        allow_delegation=False,
        llm=llm
    )

    # Définition des tâches
    analyze_question_task = Task(
        description=f"""
        Step 1: Read the user question carefully and extract the most relevant information.
        
        Question to analyze: {question}
        
        Focus on:
        - What type of data the user is asking about
        - What dimensions they're interested in
        - What business goal they're trying to achieve
        
        Provide a detailed analysis of the question highlighting these elements.
        """,
        agent=question_analyzer,
        expected_output="A comprehensive analysis of the user question with key elements highlighted"
    )

    analyze_tags_task = Task(
        description=f"""
        Step 2: Analyze the tag descriptions thoroughly:
        
        {SALES_DESCRIPTION}
        {SALES_EXAMPLES}
        
        {CHANNEL_DYNAMICS_DESCRIPTION}
        {CHANNEL_DYNAMICS_EXAMPLES}
        
        Extract and list:
        - The main business goals associated with each tag
        - The key data points covered by each tag
        - The dimensions that each tag encompasses
        
        Provide a detailed comparison of the two tags based on your analysis.
        """,
        agent=tag_expert,
        expected_output="A detailed analysis of both tag descriptions with key elements highlighted and compared"
    )

    determine_correlation_task = Task(
        description=f"""
        Step 3: Based on the analyses from previous steps, determine which tag best matches the user question:
        
        User Question: {question}
        
        Tag Descriptions:
        {SALES_DESCRIPTION}
        {CHANNEL_DYNAMICS_DESCRIPTION}
        
        Compare the elements of the question with the characteristics of each tag.
        Look for strong correlations between:
        - The question's focus and the tag's scope
        - The dimensions mentioned in the question and those covered by the tag
        - The implicit business goal of the question and the goals associated with the tag
        
        Classify the question as either:
        - #sales
        - #Channel_Dynamics
        - #somewhereelse (if it doesn't clearly match either of the first two)
        
        Provide a confidence score (0-100%) and explain your reasoning.
        """,
        agent=correlation_analyst,
        expected_output="A classification decision with confidence score and detailed reasoning"
    )

    # Création du crew
    intent_classification_crew = Crew(
        agents=[question_analyzer, tag_expert, correlation_analyst],
        tasks=[analyze_question_task, analyze_tags_task, determine_correlation_task],
        verbose=True,
        process=Process.sequential
    )
    
    # Exécution du crew
    result = intent_classification_crew.kickoff()
    
    # Extraction de la classification à partir du résultat
    final_result = result.raw
    
    # Analyse de la réponse pour extraire le tag
    if "#sales" in final_result.lower():
        return "#sales"
    elif "#channel_dynamics" in final_result.lower():
        return "#Channel_Dynamics"
    else:
        return "#somewhereelse"

# Exemple d'utilisation
if __name__ == "__main__":
    # Import nécessaire pour pandas
    import pandas as pd
    from IPython.display import display
    
    # Exemples de questions à tester
    questions = [
        "What are the top three competitors in my market and their marketing strategies",
        "How can I improve my product's features to better meet customer needs",
        "What pricing strategy would maximize my profit margins",
        "What distribution channels should I consider for my product",
        "How do our sales figures compare to those of our main competitors?",
        "How does the performance of our products differ by pack size?",
        "How does the sales performance of our new products compare to established ones?",
        "How many mentions is my product getting?",
        "What are my competitors spending on marketing?"
    ]
    
    # Collecter les résultats
    results = []
    for question in questions:
        intent = classify_intent(question, verbose=False)
        results.append({"Question": question, "Intent": intent})
        print(f"Question: {question}")
        print(f"Intent: {intent}")
        print("-" * 80)
    
    # Créer un DataFrame pandas
    df_results = pd.DataFrame(results)
    
    # Afficher le DataFrame avec un style sombre (comme dans la capture d'écran)
    def highlight_intent(val):
        if val == "#sales":
            return 'background-color: #e8f5e9; color: #2e7d32; font-weight: bold'
        elif val == "#Channel_Dynamics":
            return 'background-color: #e3f2fd; color: #1565c0; font-weight: bold'
        else:
            return 'background-color: #fff8e1; color: #f9a825; font-weight: bold'
    
    # Style pour tout le DataFrame (fond sombre)
    def style_df(df):
        # Style pour tout le DataFrame
        styles = [
            dict(selector="th", props=[("background-color", "#333"), 
                                       ("color", "white"),
                                       ("font-weight", "bold"),
                                       ("text-align", "center")]),
            dict(selector="td", props=[("text-align", "right"),
                                      ("padding", "10px 15px")]),
            dict(selector="tr:nth-child(even)", props=[("background-color", "#2a2a2a")]),
            dict(selector="tr:nth-child(odd)", props=[("background-color", "#222")]),
            dict(selector="", props=[("background-color", "#1a1a1a"),
                                     ("color", "white"),
                                     ("border", "none")])
        ]
        return styles
    
    try:
        # Essayer d'appliquer le style complet
        styled_df = df_results.style.set_table_styles(style_df(df_results)).applymap(highlight_intent, subset=['Intent'])
        display(styled_df)
    except:
        # Version simple si le style complet échoue
        try:
            styled_df = df_results.style.applymap(highlight_intent, subset=['Intent'])
            display(styled_df)
        except:
            # Affichage brut en dernier recours
            print("\nTableau pandas (non stylisé):")
            print(df_results)

Overriding of current TracerProvider is not allowed


LLM created with extracted parameters; model='gpt-4'
LLM created with extracted parameters; model='gpt-4'
LLM created with extracted parameters; model='gpt-4'
[1m[95m# Agent:[00m [1m[92mQuestion Analyzer[00m
[95m## Task:[00m [92m
        Step 1: Read the user question carefully and extract the most relevant information.
        
        Question to analyze: What are the top three competitors in my market and their marketing strategies
        
        Focus on:
        - What type of data the user is asking about
        - What dimensions they're interested in
        - What business goal they're trying to achieve
        
        Provide a detailed analysis of the question highlighting these elements.
        [00m


[1m[95m# Agent:[00m [1m[92mQuestion Analyzer[00m
[95m## Final Answer:[00m [92m
The user's question pertains to competitive analysis within their specific market. They are interested in identifying the top three competitors and gaining insights into their

Overriding of current TracerProvider is not allowed




[1m[95m# Agent:[00m [1m[92mCorrelation Analyst[00m
[95m## Final Answer:[00m [92m
The best matching tag for the user question "What are the top three competitors in my market and their marketing strategies" is #Channel_Dynamics. This decision is based on the correlation between the user's question about competitors' marketing strategies and the #Channel_Dynamics tag's focus on marketing spend, brand perception, and product marketing. The confidence score for this classification is 90% due to the strong alignment between the question's focus on marketing strategies and the tag's scope.[00m


Question: What are the top three competitors in my market and their marketing strategies
Intent: #Channel_Dynamics
--------------------------------------------------------------------------------
LLM created with extracted parameters; model='gpt-4'
LLM created with extracted parameters; model='gpt-4'
LLM created with extracted parameters; model='gpt-4'
[1m[95m# Agent:[00m [1m[92mQues

Overriding of current TracerProvider is not allowed




[1m[95m# Agent:[00m [1m[92mCorrelation Analyst[00m
[95m## Final Answer:[00m [92m
The best matching tag for the user question "How can I improve my product's features to better meet customer needs" is #Channel_Dynamics. I am 85% confident in this classification. The reasoning behind this is that the user's question is more about understanding customer needs and improving product perception, which aligns with the #Channel_Dynamics tag's focus on product perception and visibility. The #sales tag, on the other hand, is more focused on sales performance and data, which doesn't directly relate to the user's question.[00m


Question: How can I improve my product's features to better meet customer needs
Intent: #sales
--------------------------------------------------------------------------------
LLM created with extracted parameters; model='gpt-4'
LLM created with extracted parameters; model='gpt-4'
LLM created with extracted parameters; model='gpt-4'
[1m[95m# Agent:[00m [1m

Overriding of current TracerProvider is not allowed




[1m[95m# Agent:[00m [1m[92mCorrelation Analyst[00m
[95m## Final Answer:[00m [92m
The user's question "What pricing strategy would maximize my profit margins" is best matched with the #sales tag. I am 85% confident in this classification. The question is directly related to sales performance and pricing strategies, which are key elements of the #sales tag. While the #Channel_Dynamics tag could potentially provide insights into how pricing strategies affect brand perception, the user's question does not explicitly mention these elements, making the #sales tag a stronger match.[00m


Question: What pricing strategy would maximize my profit margins
Intent: #sales
--------------------------------------------------------------------------------
LLM created with extracted parameters; model='gpt-4'
LLM created with extracted parameters; model='gpt-4'
LLM created with extracted parameters; model='gpt-4'
[1m[95m# Agent:[00m [1m[92mQuestion Analyzer[00m
[95m## Task:[00m [92m


Overriding of current TracerProvider is not allowed




[1m[95m# Agent:[00m [1m[92mCorrelation Analyst[00m
[95m## Final Answer:[00m [92m
The best matching hashtag for the user question "What distribution channels should I consider for my product" is #Channel_Dynamics. I am 85% confident in this classification. The question is more aligned with the scope of the #Channel_Dynamics tag, as it pertains to marketing and product placement strategies, rather than sales performance or data. The user is seeking advice on distribution channels, which is a key aspect of marketing and falls under the purview of the #Channel_Dynamics tag.[00m


Question: What distribution channels should I consider for my product
Intent: #Channel_Dynamics
--------------------------------------------------------------------------------
LLM created with extracted parameters; model='gpt-4'
LLM created with extracted parameters; model='gpt-4'
LLM created with extracted parameters; model='gpt-4'
[1m[95m# Agent:[00m [1m[92mQuestion Analyzer[00m
[95m## Task:

Overriding of current TracerProvider is not allowed




[1m[95m# Agent:[00m [1m[92mCorrelation Analyst[00m
[95m## Final Answer:[00m [92m
The best matching hashtag for the user question "How do our sales figures compare to those of our main competitors?" is #sales. I am 90% confident in this classification. The question is directly asking about sales figures, which falls under the scope of the #sales tag. The tag's focus on understanding and maximizing sales performance, as well as its coverage of data points such as sales in various currencies and trade levels, aligns well with the user's question. The question does not mention any elements related to the #Channel_Dynamics tag, such as marketing spend or brand perception. Therefore, the #sales tag is the most appropriate classification.[00m


Question: How do our sales figures compare to those of our main competitors?
Intent: #sales
--------------------------------------------------------------------------------
LLM created with extracted parameters; model='gpt-4'
LLM created wi

Overriding of current TracerProvider is not allowed




[1m[95m# Agent:[00m [1m[92mCorrelation Analyst[00m
[95m## Final Answer:[00m [92m
The best matching tag for the user question "How does the performance of our products differ by pack size?" is #sales. I am 90% confident in this classification. The question is focused on product performance and specifically mentions a dimension (pack size) that is covered under the #sales tag. The question does not align with the key elements of the #Channel_Dynamics tag, which are more focused on marketing efforts and customer perception.[00m


Question: How does the performance of our products differ by pack size?
Intent: #sales
--------------------------------------------------------------------------------
LLM created with extracted parameters; model='gpt-4'
LLM created with extracted parameters; model='gpt-4'
LLM created with extracted parameters; model='gpt-4'
[1m[95m# Agent:[00m [1m[92mQuestion Analyzer[00m
[95m## Task:[00m [92m
        Step 1: Read the user question carefully

Overriding of current TracerProvider is not allowed




[1m[95m# Agent:[00m [1m[92mCorrelation Analyst[00m
[95m## Final Answer:[00m [92m
The best matching tag for the user question "How does the sales performance of our new products compare to established ones?" is #sales. I am 95% confident in this classification. The question is directly asking about sales performance, which falls under the scope of the #sales tag. The question does not mention any elements related to the #Channel_Dynamics tag, such as marketing efforts or customer perceptions. Therefore, the #sales tag is the most appropriate.[00m


Question: How does the sales performance of our new products compare to established ones?
Intent: #sales
--------------------------------------------------------------------------------
LLM created with extracted parameters; model='gpt-4'
LLM created with extracted parameters; model='gpt-4'
LLM created with extracted parameters; model='gpt-4'
[1m[95m# Agent:[00m [1m[92mQuestion Analyzer[00m
[95m## Task:[00m [92m
        S

Overriding of current TracerProvider is not allowed




[1m[95m# Agent:[00m [1m[92mCorrelation Analyst[00m
[95m## Final Answer:[00m [92m
The best matching tag for the user question "How many mentions is my product getting?" is #Channel_Dynamics. The confidence score for this classification is 90%. The reasoning behind this decision is that the user's question is focused on brand visibility and customer perception, which is covered by the #Channel_Dynamics tag. The question does not directly relate to sales performance, which is the focus of the #sales tag. Therefore, the #Channel_Dynamics tag is a better match for this question.[00m


Question: How many mentions is my product getting?
Intent: #sales
--------------------------------------------------------------------------------
LLM created with extracted parameters; model='gpt-4'
LLM created with extracted parameters; model='gpt-4'
LLM created with extracted parameters; model='gpt-4'
[1m[95m# Agent:[00m [1m[92mQuestion Analyzer[00m
[95m## Task:[00m [92m
        Step 1:

  styled_df = df_results.style.set_table_styles(style_df(df_results)).applymap(highlight_intent, subset=['Intent'])


Unnamed: 0,Question,Intent
0,What are the top three competitors in my market and their marketing strategies,#Channel_Dynamics
1,How can I improve my product's features to better meet customer needs,#sales
2,What pricing strategy would maximize my profit margins,#sales
3,What distribution channels should I consider for my product,#Channel_Dynamics
4,How do our sales figures compare to those of our main competitors?,#sales
5,How does the performance of our products differ by pack size?,#sales
6,How does the sales performance of our new products compare to established ones?,#sales
7,How many mentions is my product getting?,#sales
8,What are my competitors spending on marketing?,#Channel_Dynamics
