<a href="https://colab.research.google.com/github/Rajan232/TeamCentelon_GovHack24/blob/master/ai-models/Executive_Summary_AI_Agent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from scipy import stats
import os
from dotenv import load_dotenv
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain_community.vectorstores import FAISS

# Load environment variables
load_dotenv()

# Initialize AzureOpenAI API
api_key = "your_api_key"
azure_endpoint = 'your_azure_endpoint'
api_version = 'your_api_version'

# Specify embedding model details
embed_model = AzureOpenAIEmbeddings(
    deployment="use_your_deployed_model_name",
    model="use_your_Model_name",
    api_key=api_key,
    azure_endpoint=azure_endpoint,
    api_version=api_version,
)

# Specify LLM model details
llm = AzureChatOpenAI(
    deployment="use_your_deployed_model_name",
    model="use_your_Model_name",
    api_key=api_key,
    azure_endpoint=azure_endpoint,
    api_version=api_version,
)

def load_and_process_data():
    """Load and process all relevant datasets."""
    datasets = {
        'ACCIDENT': pd.read_csv('ACCIDENT.csv'),
        'ACCIDENT_LOCATION': pd.read_csv('ACCIDENT_LOCATION.csv'),
        'PERSON': pd.read_csv('PERSON.csv'),
        'VEHICLE': pd.read_csv('VEHICLE.csv')
    }
    return datasets

def extract_key_stats(datasets):
    """Extract key statistics from the datasets."""
    stats = {}

    accidents = datasets['ACCIDENT']
    stats['total_accidents'] = len(accidents)
    stats['avg_severity'] = accidents['SEVERITY'].mean()
    stats['fatal_accident_rate'] = (accidents['NO_PERSONS_KILLED'] > 0).mean()
    stats['day_of_week_mean'] = accidents['DAY_OF_WEEK'].mean()
    stats['avg_speed_zone'] = accidents['SPEED_ZONE'].mean()
    stats['speed_zone_std'] = accidents['SPEED_ZONE'].std()
    stats['avg_vehicles_per_accident'] = accidents['NO_OF_VEHICLES'].mean()
    stats['avg_persons_per_accident'] = accidents['NO_PERSONS'].mean()

    locations = datasets['ACCIDENT_LOCATION']
    stats['road_type_distribution'] = locations['ROAD_TYPE'].value_counts(normalize=True).to_dict()

    persons = datasets['PERSON']
    stats['injury_distribution'] = persons['INJ_LEVEL'].value_counts(normalize=True).to_dict()

    vehicles = datasets['VEHICLE']
    stats['vehicle_type_distribution'] = vehicles['VEHICLE_TYPE'].value_counts(normalize=True).to_dict()

    return stats

def generate_insights(stats):
    """Generate insights based on the statistics."""
    insights = [
        f"Total of {stats['total_accidents']} accidents recorded.",
        f"Average accident severity is {stats['avg_severity']:.2f} on a scale of 1-4 (1 being most severe).",
        f"{stats['fatal_accident_rate']:.2%} of accidents involve fatalities.",
        f"Accidents are slightly more frequent mid-week (mean day: {stats['day_of_week_mean']:.2f}).",
        f"Average speed zone for accidents is {stats['avg_speed_zone']:.2f} km/h, with high variability (std: {stats['speed_zone_std']:.2f}).",
        f"On average, {stats['avg_vehicles_per_accident']:.2f} vehicles are involved per accident.",
        f"An average of {stats['avg_persons_per_accident']:.2f} persons are involved per accident.",
        f"Most common road type for accidents: {max(stats['road_type_distribution'], key=stats['road_type_distribution'].get)}",
        f"Most common injury level: {max(stats['injury_distribution'], key=stats['injury_distribution'].get)}",
        f"Most common vehicle type in accidents: {max(stats['vehicle_type_distribution'], key=stats['vehicle_type_distribution'].get)}"
    ]
    return insights

def create_vector_store(insights):
    """Create a vector store from the insights."""
    texts = [" ".join(insights)]
    return FAISS.from_texts(texts=texts, embedding=embed_model)

def generate_prompt():
    """Generate a prompt template for the LLM."""
    template = """You are an AI assistant tasked with creating an executive brief on road safety in Victoria, Australia.
    Use the following data insights to create a concise, actionable brief for high-level decision-makers:

    {context}

    Your brief should include:
    1. A succinct overview of the current road safety situation (2-3 sentences)
    2. 4-5 key insights drawn from the data, focusing on the most critical findings
    3. 3-4 actionable, high-impact recommendations for improving road safety
    4. 1-2 areas that require immediate attention or further investigation

    The brief should be clear, impactful, and focused on informing policy decisions and safety initiatives.
    Consider how this information can be used to predict, prevent, and respond to road accidents.
    Limit your response to about 500 words.

    Question: {question}
    Your response:"""

    return PromptTemplate(template=template, input_variables=["context", "question"])

def main():
    # Load and process data
    datasets = load_and_process_data()

    # Extract key statistics
    stats = extract_key_stats(datasets)

    # Generate insights
    insights = generate_insights(stats)

    # Create vector store
    vector_store = create_vector_store(insights)

    # Create prompt template
    prompt = generate_prompt()

    # Initialize QA chain
    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=vector_store.as_retriever(),
        return_source_documents=True,
        chain_type_kwargs={"prompt": prompt}
    )

    # Generate executive brief
    query = "Generate an executive brief on road safety based on the provided insights."
    result = qa_chain({"query": query})

    print("Executive Brief:")
    print(result['result'])

if __name__ == "__main__":
    main()

  warn_deprecated(


Executive Brief:
Executive Brief: Road Safety in Victoria, Australia

Overview:
Victoria, Australia has recorded a total of 171,099 accidents, with an average accident severity of 2.60 on a scale of 1-4. The average speed zone for accidents is 133.02 km/h, and 1.64% of accidents involve fatalities. Accidents are slightly more frequent mid-week, and the most common road type for accidents is "ROAD."

Key Insights:
1. High average speed zone: The average speed zone for accidents is 133.02 km/h, indicating a high average speed of vehicles involved in accidents. This poses a significant risk to road safety and requires immediate attention.

2. Average accident severity: With an average severity of 2.60 on a scale of 1-4, there is a need to focus on reducing the severity of accidents and improving overall road safety measures.

3. Involvement of multiple vehicles and persons: On average, 1.82 vehicles and 2.34 persons are involved per accident, suggesting a need for strategies to address mu