In [1]:
import os
import sys
import logging
from pathlib import Path

# Add the project root to Python path
project_root = Path().absolute().parent
sys.path.append(str(project_root))

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

print(f"Project root: {project_root}")
print(f"Current working directory: {os.getcwd()}")

Project root: /home/vlad/dev/data-analyser
Current working directory: /home/vlad/dev/data-analyser/notebooks


In [2]:
from src.agent.agent import DataAnalysisAgent
from src.clients.db_client import DatabaseClient
from src.clients.jira_client import JiraClient
from src.models.schemas import ValidationResult, BusinessInsight, AgentState, JiraTicket
from dotenv import load_dotenv

load_dotenv()

True

In [3]:
# JIRA connection parameters
JIRA_BASE_URL = os.environ.get('JIRA_BASE_URL')
JIRA_USER_EMAIL = os.environ.get('JIRA_USER_EMAIL')
JIRA_API_TOKEN = os.environ.get('JIRA_API_TOKEN') 
JIRA_PROJECT_KEY = os.environ.get('JIRA_PROJECT_KEY')

In [4]:
DB_PATH = os.path.expanduser("../data/porsche_analytics.db")
sqlite_connection_string = f"sqlite:///{DB_PATH}"

jira = JiraClient(base_url=JIRA_BASE_URL, email=JIRA_USER_EMAIL, api_token=JIRA_API_TOKEN)
sqlite_client = DatabaseClient(sqlite_connection_string)

agent = DataAnalysisAgent(
    agent_config=str(project_root / "config" / "config.yaml"),
    db_client=sqlite_client,
    max_retries=3,
)

In [5]:
# TODO: in production pipeline, pull ticket from Jira!!!!

# tickets to test
JIRA_PROJECT_KEY="KAN"

tickets = [
    {
        'project': JIRA_PROJECT_KEY,
        'summary': 'Car Models Analysis',
        'description': 'How many unqiue car models we have per car category? Sort the results in descending order!',
        'issuetype': 'Task',
    },
    {
        'project': JIRA_PROJECT_KEY,
        'summary': 'Dealership Performance by Region Analysis',
        'description': 'Analyze the average dealership rating and sales capacity by region. Which regions have the highest performing dealerships? Sort the results by average rating in descending order.',
        'issuetype': 'Task',
    },
    {
        'project': JIRA_PROJECT_KEY,
        'summary': 'Service Cost Analysis by Model and Service Type',
        'description': 'Analyze the average service costs by model and service type. Identify which models have higher maintenance costs and which service types contribute most to overall service revenue.',
        'issuetype': 'Task',
    },
    # irrelevant task that doesn't match the schema
    {
        "id": "DA-101",
        "summary": "Total Sales Overview",
        "description": "What is the average user basket size?"
    },
]

Functions

In [8]:
def set_task_from_ticket(state: AgentState) -> AgentState:
    """Extract task from JIRA ticket."""
    logger.info(f"Extracting task from ticket {state.ticket.ticket_id}")
    task = state.ticket.description
    return state.model_copy(update={"current_task": task})


generate_sql_fn = agent.sql_generation_tool.generate_query
def generate_sql(state: AgentState) -> AgentState:
    """Generate SQL query from task description."""
    logger.info(f"Generating SQL for the current task")
    try:
        sql_query = generate_sql_fn(task_description=state.current_task)
        return state.model_copy(update={"sql_query": sql_query, "error_message": None})
    except Exception as e:
        logger.error(f"Error generating SQL: {str(e)}")
        return state.model_copy(update={"error_message": str(e)})


validate_sql_fn = agent.sql_validation_tool.validate_sql
def validate_sql(state: AgentState) -> AgentState:
    """Validate the generated SQL query."""
    logger.info("Validating SQL query")
    if not state.sql_query:
        return state.model_copy(update={
            "validation_result": ValidationResult(is_valid=False, errors=["No SQL query to validate"])
        })
        
    validation_result = validate_sql_fn(sql_query=state.sql_query)
    return state.model_copy(update={"validation_result": validation_result})


execute_query_fn = sqlite_client.execute_query
def execute_query(state: AgentState) -> AgentState:
    """Execute the validated SQL query."""
    logger.info("Executing SQL query")
    try:
        query_result = execute_query_fn(state.sql_query)
        return state.model_copy(update={"query_result": query_result, "error_message": None})
    except Exception as e:
        logger.error(f"Error executing query: {str(e)}")
        return state.model_copy(update={"error_message": str(e)})


generate_insights_fn = agent.sql_insight_tool.generate_insights
def generate_insights(state: AgentState) -> AgentState:
    """Generate business insights from query results."""
    logger.info("Generating business insights")
    if not state.query_result:
        return state.model_copy(update={
            "business_insight": BusinessInsight(
                summary="Unable to generate insights - no query results available.",
                key_points=["Query execution failed."]
            )
        })

    insights = generate_insights_fn(
        task_description=state.current_task,
        query_result=state.query_result
    )
    return state.model_copy(update={"business_insight": insights})


def update_jira_ticket(state: AgentState, ticket_status: str = 'In Progress') -> AgentState:
    """Update JIRA ticket with insights."""
    logger.info(f"Updating JIRA ticket {state.ticket.ticket_id}")

    # ticket status update
    jira.transition_issue(issue_key=state.ticket.ticket_id, status_name='В работе')

    # add comment with business insights
    if not state.business_insight:
        error_comment = "Sorry, I cannot generate insights for this task."
        jira.add_comment(issue=state.ticket.ticket_id, comment=error_comment)
    
    jira.add_comment(issue=state.ticket.ticket_id, comment=state.business_insight)
    return state

In [8]:
# Jira Ticket
ticket = tickets[0] 

jira_ticket = JiraTicket(
    ticket_id=ticket['project'],
    summary=ticket['summary'],
    description=ticket['description'],
    status="OPEN",
    assignee='bot'
)

# define agent state
agent_state = AgentState(
    ticket=jira_ticket,
    current_task=None,
    sql_query=None,
    validation_result=None,
    query_result=None,
    business_insight=None,
    error_message=None,
)

# task extraction
print('\nInitial Task: \n', agent_state.current_task)
agent_state = set_task_from_ticket(agent_state)
print('\nTask after Ticket Extraction: \n', agent_state.current_task)

2025-07-04 09:26:31,676 - __main__ - INFO - Extracting task from ticket KAN



Initial Task: 
 None

Task after Ticket Extraction: 
 How many unqiue car models we have per car category? Sort the results in descending order!


In [9]:
# SQL generation
agent_state = generate_sql(agent_state)
print('\nSQL Query Generated: \n', agent_state.sql_query)

2025-07-04 09:26:33,528 - __main__ - INFO - Generating SQL for the current task
2025-07-04 09:26:33,531 - src.tools.sql_tool - INFO - Generating SQL query for task: How many unqiue car models we have per car category? Sort the results in descending order!
2025-07-04 09:26:36,663 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"



SQL Query Generated: 
 SELECT segment, COUNT(DISTINCT model_id) AS unique_models
FROM models
GROUP BY segment
ORDER BY unique_models DESC;


In [10]:
# SQL validation
agent_state = validate_sql(agent_state)
print('\nValidation Results: \n', agent_state.validation_result)

2025-07-04 09:26:38,346 - __main__ - INFO - Validating SQL query



Validation Results: 


In [14]:
from typing import Literal

def should_retry_query(state: AgentState) -> Literal["retry", "failed", "continue"]:
        max_retries = 3
        """Check if we should retry generating SQL or move on."""
        if state.validation_result and state.validation_result.is_valid:
            return "continue"
        
        if state.retry_count < max_retries:
            return "retry"
        else:
            return "failed"
        
should_retry_query(agent_state)

'continue'

In [45]:
# sql running
agent_state = execute_query(agent_state)
print('Query Results: \n', agent_state.query_result)

2025-07-03 14:00:20,523 - __main__ - INFO - Executing SQL query
2025-07-03 14:00:20,530 - src.clients.db_client - INFO - Query executed successfully. Returned 7 rows.


Query Results: 
 data=[{'segment': 'Sports Car', 'unique_models': 5}, {'segment': 'SUV', 'unique_models': 3}, {'segment': 'Wagon', 'unique_models': 1}, {'segment': 'Supercar', 'unique_models': 1}, {'segment': 'Sedan', 'unique_models': 1}, {'segment': 'Luxury', 'unique_models': 1}, {'segment': 'Hypercar', 'unique_models': 1}] row_count=7 column_names=['segment', 'unique_models'] execution_time_ms=3.1719207763671875


In [None]:
# results validation
# check if we need it

In [50]:
# insights generation
agent_state = generate_insights(agent_state)
print('\nBusiness Insights: \n', agent_state.business_insight)

2025-07-03 14:09:30,855 - __main__ - INFO - Generating business insights
2025-07-03 14:09:30,858 - src.tools.insight_tool - INFO - Generating insights for task: How many unqiue car models we have per car category? Sort the results in descending order!
2025-07-03 14:09:41,503 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"



Business Insights: 
 Summary: The data analysis reveals the number of unique car models per car category. The 'Sports Car' category has the highest number of unique models with 5, followed by 'SUV' with 3. All other categories have only one unique model.

Key Points:
- The 'Sports Car' category has the most diverse range of models, indicating a potentially competitive market segment.
- The 'SUV' category is the second most diverse, suggesting a moderate level of competition.
- The 'Wagon', 'Supercar', 'Sedan', 'Luxury', and 'Hypercar' categories each have only one unique model, indicating a lack of diversity in these segments.

Recommendations:
- Consider expanding the range of models in the 'Wagon', 'Supercar', 'Sedan', 'Luxury', and 'Hypercar' categories to increase competitiveness and customer choice.
- Investigate the reasons behind the high diversity in the 'Sports Car' category. This could provide insights into consumer preferences and successful strategies.
- Maintain a close w

In [54]:
issue = jira.get_issue(issue='KAN-9')
issue_details = jira.extract_issue_details(issue)

In [58]:
# change ticket status
jira.transition_issue(issue_key=issue_details['ticket_id'], status_name='В работе')

# add comment
comment = agent_state.business_insight
jira.add_comment(issue=issue_details['ticket_id'], comment=comment)

{'self': 'https://vladislavphysique.atlassian.net/rest/api/2/issue/10019/comment/10000',
 'id': '10000',
 'author': {'self': 'https://vladislavphysique.atlassian.net/rest/api/2/user?accountId=70121%3Af86554c4-819a-470f-9606-bf3e8b1202e6',
  'accountId': '70121:f86554c4-819a-470f-9606-bf3e8b1202e6',
  'emailAddress': 'vladislavphysique@gmail.com',
  'avatarUrls': {'48x48': 'https://secure.gravatar.com/avatar/3a2cef07b580b80deb205a56efe02d14?d=https%3A%2F%2Favatar-management--avatars.us-west-2.prod.public.atl-paas.net%2Finitials%2FVR-3.png',
   '24x24': 'https://secure.gravatar.com/avatar/3a2cef07b580b80deb205a56efe02d14?d=https%3A%2F%2Favatar-management--avatars.us-west-2.prod.public.atl-paas.net%2Finitials%2FVR-3.png',
   '16x16': 'https://secure.gravatar.com/avatar/3a2cef07b580b80deb205a56efe02d14?d=https%3A%2F%2Favatar-management--avatars.us-west-2.prod.public.atl-paas.net%2Finitials%2FVR-3.png',
   '32x32': 'https://secure.gravatar.com/avatar/3a2cef07b580b80deb205a56efe02d14?d=https

In [9]:
from typing import List

def get_active_issue_ids_from_jira():
    """Get active tickets from JIRA."""
    # get issues ids assigned to the bot
    active_issues = jira.get_active_issues()
    issue_keys = [issue['key'] for issue in active_issues]

    # rule: if the last comment from bot -> no need to process the ticket
    issue_ids = []
    for issue in issue_keys:
        # TODO: improve in the future (API call in a loop -> slow)
        response = jira.get_comments(issue_key=issue)

        # if empty comments -> process
        if not response['comments']:
            issue_ids.append(issue)
        else:
            last_comment = response['comments'][-1]
            if last_comment['author']['accountId'] != jira.account_id:
                issue_ids.append(issue)
    return issue_ids


def get_jira_tickets(issue_ids: List[str]):
    tickets = []
    active_issue_ids = get_active_issue_ids_from_jira()
    for issue_id in active_issue_ids:
        issue = jira.get_issue(issue=issue_id)
        tickets.append(jira.extract_issue_details(issue))
    return tickets

In [10]:
issue_ids = get_active_issue_ids_from_jira()
jira_tickets = get_jira_tickets(issue_ids=issue_ids)

### Agent Workflow

In [11]:
jira_ticket = jira_tickets[2]

# raw dict into JiraTicket
jira_ticket = JiraTicket(
    ticket_id=jira_ticket['ticket_id'],
    summary=jira_ticket['summary'],
    description=jira_ticket['description'],
    status=jira_ticket['status'],
)


In [13]:
# define initial agent state
initial_state = AgentState(ticket=jira_ticket)

# execute the workflow
final_state = agent.workflow.invoke(initial_state)

2025-07-04 11:04:04,491 - src.agent.workflow - INFO - Extracting task from ticket KAN-9
2025-07-04 11:04:04,493 - src.agent.workflow - INFO - Generating SQL for the current task
2025-07-04 11:04:04,495 - src.tools.sql_tool - INFO - Generating SQL query for task: How many unqiue car models we have per car category? Sort the results in descending order!
2025-07-04 11:04:07,005 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-07-04 11:04:07,024 - src.agent.workflow - INFO - Validating SQL query
2025-07-04 11:04:07,036 - src.agent.workflow - INFO - Executing SQL query
2025-07-04 11:04:07,042 - src.clients.db_client - INFO - Query executed successfully. Returned 7 rows.
2025-07-04 11:04:07,044 - src.agent.workflow - INFO - Generating business insights
2025-07-04 11:04:07,045 - src.tools.insight_tool - INFO - Generating insights for task: How many unqiue car models we have per car category? Sort the results in descending order!
2025-07-04 

In [14]:
final_state

{'ticket': JiraTicket(ticket_id='KAN-9', summary='Car Models Analysis', description='How many unqiue car models we have per car category? Sort the results in descending order!', status='К выполнению', assignee=None),
 'current_task': 'How many unqiue car models we have per car category? Sort the results in descending order!',
 'sql_query': 'SELECT segment, COUNT(DISTINCT model_id) AS unique_models\nFROM models\nGROUP BY segment\nORDER BY unique_models DESC;',
 'query_result': QueryResult(data=[{'segment': 'Sports Car', 'unique_models': 5}, {'segment': 'SUV', 'unique_models': 3}, {'segment': 'Wagon', 'unique_models': 1}, {'segment': 'Supercar', 'unique_models': 1}, {'segment': 'Sedan', 'unique_models': 1}, {'segment': 'Luxury', 'unique_models': 1}, {'segment': 'Hypercar', 'unique_models': 1}], row_count=7, column_names=['segment', 'unique_models'], execution_time_ms=3.5958290100097656),
 'business_insight': "Summary: The data analysis reveals that the 'Sports Car' category has the highe

In [13]:
jira_ticket.ticket_id

'KAN-9'

In [14]:
JiraTicket.model_validate(jira_ticket)

JiraTicket(ticket_id='KAN-9', summary='Car Models Analysis', description='How many unqiue car models we have per car category? Sort the results in descending order!', status='К выполнению', assignee=None)

In [9]:
agent.process_ticket(ticket=jira_ticket)

2025-07-04 10:59:09,934 - src.agent.agent - INFO - Processing ticket KAN-9
2025-07-04 10:59:09,940 - src.agent.workflow - INFO - Extracting task from ticket KAN-9
2025-07-04 10:59:09,942 - src.agent.workflow - INFO - Generating SQL for the current task
2025-07-04 10:59:09,944 - src.tools.sql_tool - INFO - Generating SQL query for task: How many unqiue car models we have per car category? Sort the results in descending order!
2025-07-04 10:59:12,322 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-07-04 10:59:12,335 - src.agent.workflow - INFO - Validating SQL query
2025-07-04 10:59:12,350 - src.agent.workflow - INFO - Executing SQL query
2025-07-04 10:59:12,356 - src.clients.db_client - INFO - Query executed successfully. Returned 7 rows.
2025-07-04 10:59:12,360 - src.agent.workflow - INFO - Generating business insights
2025-07-04 10:59:12,361 - src.tools.insight_tool - INFO - Generating insights for task: How many unqiue car models 

AttributeError: 'dict' object has no attribute 'business_insight'