In [20]:
#%pip install -r requirements.txt

In [21]:
# Specify number of use cases to process or use "All" for the entire file
num_to_process = "3" #input("Enter the number of use cases to process (or 'All' for all use cases): ").strip()

# Set the input file path
# input_file = "artifact/ai_uc_inventory-dhs.xlsx" 
input_file = "artifact/2024_consolidated_ai_inventory_raw.xlsx"

# # OpenAI API 
# api_url = "https://api.openai.com/v1"
# # load api key from .env_val file...syntax [OPENAI_API_KEY="your_api_key"]
# from dotenv import load_dotenv
# import os
# load_dotenv('.env_vals')
# str_api_key = os.getenv('OPENAI_API_KEY')
# model_name="gpt-4"
# # Set the output file paths
# output_file_json = "artifact/ai_use_case_analysis.openai.json"
# output_file_jsonl = "artifact/ai_use_case_analysis.openai.jsonl"
# output_file_json = "artifact/global_ai_use_case_analysis.openai.json"
# output_file_jsonl = "artifact/global_ai_use_case_analysis.openai.jsonl"

# Ollama API
api_url = "http://192.168.244.61:5500/v1"
str_api_key="ollama"
model_name="wizard-vicuna-uncensored:30b"
# Set the output file paths
# output_file_json = "artifact/dhs_ai_use_case_analysis.local.json"
# output_file_jsonl = "artifact/dhs_ai_use_case_analysis.local.jsonl"
output_file_json = "artifact/global_ai_use_case_analysis.local.json"
output_file_jsonl = "artifact/global_ai_use_case_analysis.local.jsonl"

# Validate the API key
print(str_api_key)

ollama


In [22]:
import pandas as pd
import openai
from openai import Client
import json

# Initialize the OpenAI API client
client = Client(api_key=str_api_key,base_url=api_url)

# Load the input XLSX dataset
all_data = pd.read_excel(input_file)

# Determine the data to process
if num_to_process.lower() == "all":
    data_to_process = all_data
else:
    try:
        num_to_process = int(num_to_process)
        data_to_process = all_data.head(num_to_process)
    except ValueError:
        print("Invalid input. Please enter a number or 'All'.")
        exit()

# List to hold the results
results = []

# Iterate through the selected use cases
for index, row in data_to_process.iterrows():
    usecase_id = row.get("Use Case ID", "Not provided")
    use_case_name = row.get("Use Case Name", f"Use Case {index + 1}")
    
    if input_file == "artifact/2024_consolidated_ai_inventory_raw.xlsx":

        agency = row.get("Agency", "Not provided")
        bureau_dept = row.get("Bureau", "Not provided")
        benefit_statement = row.get("What is the intended purpose and expected benefits of the AI?", "Not provided")
        system_outputs = row.get("Describe the AI system’s outputs.", "Not provided")

    else: # For the DHS AI Use Case Analysis file structure

        agency = row.get("Agency", "Not provided")
        bureau_dept = row.get("Bureau / Department", "Not provided")
        benefit_statement = row.get("Summary of Use Case", "Not provided") + " " + row.get("What is the intended purpose and expected benefits of the AI?", "Not provided")
        system_outputs = row.get("System Outputs", "Not provided")

    # Prepare the use case details
    use_case_details = f"""
- Use Case ID: {usecase_id}
- Agency: {agency}
- Bureau / Department: {bureau_dept}
- Benefit Statement: {benefit_statement}
- System Outputs: {system_outputs}
"""
 
    # Construct the prompt
    prompt = f"""
As a helpful assistant, please analyze the following AI use case <use_case>:

<use_case>
**Details:**
{use_case_details}
</use_case>

It will be very helpful if you will address these areas concisely:

### **A. Named Entity Relationships**
Extract entities from the Purpose Statement, Benefit Statement, and System Outputs:
1. **PERSON**: Individuals or roles.
2. **ORGANIZATION**: Agencies or entities.
3. **LOCATION**: Places mentioned.
4. **DATE**: Specific timeframes.

### **B. Dependency Parsing and Analysis**
1. Identify grammatical relationships and dependency labels (e.g., subject, object).
2. Highlight key sentence structures reflecting goals or constraints.
3. Note ambiguities and incomplete dependencies.

### **C. Functional and Non-Functional Requirements**
From the use case and analyses:
- **Functional Requirements**: Actions the system must perform (e.g., data processing).
- **Non-Functional Requirements**: Constraints like performance or scalability.
Structure:
- **ID**: Unique identifier.
- **Type**: Functional or Non-Functional.
- **Description**: Requirement summary.
- **Rationale**: Why it matters.
- **Dependencies**: Links to related components.

### **D. Value Categorization**
Classify as one:
1. **Efficiency Amplifier**: Use cases that streamline existing processes, reduce resource consumption, or improve speed and accuracy within existing workflows.
2. **Capability Enhancer**: Use cases that introduce new capabilities, expand operational boundaries, or enable tasks previously constrained by resource or technical limitations.
3. **Breakthrough Enabler**: Use cases that achieve novel outcomes, redefine operational paradigms, or transcend traditional limitations entirely.

Justify based on purpose, outputs, and context.

### **E. Operational Impact and Transformation**
1. **Operational Impact**: Improvements in efficiency or capability.
2. **Transformation**: Changes to workflows or overcoming limitations.

### **F. Value Metrics**
Measure value through:
- Operational metrics (e.g., time saved).
- Organizational benefits (e.g., cost reduction).
- Societal impacts (e.g., improved security).

Provide responses with clarity and focus, ensuring the analysis aligns with the data available in the AI use case. Use explicit references to information from the Purpose Statement, Benefit Statement, and System Outputs to ground conclusions. Where data is missing or ambiguous, make reasonable inferences based on established patterns in similar use cases, and flag areas requiring further clarification for completeness.
"""

    try:
        # Submit the prompt to OpenAI API using the ChatCompletion interface
        response = client.chat.completions.create(
            model=model_name,  #"gpt-4",  # Use "gpt-3.5-turbo" if preferred
            messages=[
                {"role": "system", "content": "You are an AI assistant who is an expert at analyzing proposals for AI use cases."},
                {"role": "user", "content": prompt}
            ],
            max_tokens=6000
        )

        # Extract the response text
        response_text = response.choices[0].message.content.strip()

        # Append the result to the list
        results.append({
            "Use Case ID": usecase_id,
            "Agency": agency,
            "Bureau / Department": bureau_dept,
            "Use Case Name": use_case_name,
            "Benefit Statement": benefit_statement,
            "System Outputs": system_outputs,
            # "Prompt": prompt, # Commented out to avoid saving the prompt in the JSON file
            "Response": response_text
        })

        # Write the result to a JSON Lines file
        with open(output_file_jsonl, mode="a", encoding="utf-8") as file:
            json.dump({
                "Use Case ID": usecase_id,
                "Agency": agency,
                "Bureau / Department": bureau_dept,
                "Use Case Name": use_case_name,
                "Benefit Statement": benefit_statement,
                "System Outputs": system_outputs,
                # "Prompt": prompt, # Commented out to avoid saving the prompt in the JSON file
                "Response": response_text
            }, file)
            file.write("\n")

    except Exception as e:
        print(f"Error processing prompt for use case '{use_case_name}': {e}")
        print(f"Detailed error: {str(e)}")
        print(f"API Base URL: {client.base_url}")

# Write the results to a JSON file
with open(output_file_json, mode="a", encoding="utf-8") as file:
    json.dump(results, file, indent=4)

print(f"Analysis completed. Results saved to {output_file_json} and {output_file_jsonl}")


Analysis completed. Results saved to artifact/global_ai_use_case_analysis.local.json and artifact/global_ai_use_case_analysis.local.jsonl
