In [2]:
import ollama

# Ollama to Generate One Answer

In [4]:

client = ollama.Client()

model = 'llama3.2:1b'
prompt = 'What is AI?'

response = client.generate(model, prompt)

print(response['response'])


Artificial intelligence (AI) refers to the development of computer systems that can perform tasks that typically require human intelligence, such as learning, problem-solving, decision-making, and perception. These systems use algorithms and data to make predictions, decisions, or take actions without being explicitly programmed.

AI can be categorized into several types, including:

1. Narrow or Weak AI: This type of AI is designed to perform a specific task, such as facial recognition, language translation, or playing chess. It's not capable of general intelligence like humans.
2. General or Strong AI: This type of AI aims to create machines that can think and act like humans, with the ability to learn, reason, and apply knowledge across various domains. However, creating a truly general AI is still a subject of ongoing research and debate.

AI systems use various techniques, such as machine learning (ML), deep learning (DL), natural language processing (NLP), and computer vision, to

# Query Classification Testing

In [None]:

WRITE_KEYWORDS = {'update', 'change', 'modify', 'delete', 'insert', 'set', 'assign'}
READ_KEYWORDS  = {'get', 'show', 'find', 'retrieve', 'list', 'display', 'fetch'}


def rule_based_classify(query: str) -> str:
    query_lower = query.lower()

    if ('vacation' in query_lower or 'holiday' in query_lower or "sick-leave" in query_lower or "sick leave" in query_lower or "day off" in query_lower or "day-off" in query_lower) and ('can' in query_lower or 'want' in query_lower):
        return "write", 1

    found_write = {kw for kw in WRITE_KEYWORDS if kw in query_lower}
    found_read = {kw for kw in READ_KEYWORDS if kw in query_lower}
    
    if found_write and not found_read:
        return "write", 0
    if found_read and not found_write:
        return "read", 0
    return "ambiguous", 0

def ml_based_classify(query: str) -> str:
    prompt = '''
    Task: Identify if the query is a read or write request.

    Instructions: Answer only in "read" or "write" 
    Do not include anything else.

    query: {question}
    '''
    client = ollama.Client()
    model = 'mistral:latest'
    response = client.generate(model, prompt.format(question=query))
    return response['response'], 0

def classify_query(query: str) -> str:
    rule_result = rule_based_classify(query)
    if rule_result != "ambiguous":
        return rule_result
    return ml_based_classify(query)

# Example usage:
if __name__ == "__main__":
    queries = [
        "Change the name of employee with ID:1011 to Muhid Qaiser",
        "What is the current salary of employee ID:1011?",
        "Can ID:1011 take a vacation tomorrow?",
        "List all employees with remaining vacations",
        "Update salary for ID:1011 to $342342",
        "Show me the vacation days remaining for ID:1011",
        "Can ID:1011 take a vacation tomorrow?",
        "My name is Muhid Qaiser, I want a vacation tomorrow",
    ]
    
    for q in queries:
        decision = classify_query(q)
        print(f"Query: {q}\nClassified as: {decision}\n")


Query: Change the name of employee with ID:1011 to Muhid Qaiser
Classified as: ('write', 0)

Query: What is the current salary of employee ID:1011?
Classified as: ('ambiguous', 0)

Query: Can ID:1011 take a vacation tomorrow?
Classified as: ('write', 1)

Query: List all employees with remaining vacations
Classified as: ('read', 0)

Query: Update salary for ID:1011 to $342342
Classified as: ('write', 0)

Query: Show me the vacation days remaining for ID:1011
Classified as: ('read', 0)

Query: Can ID:1011 take a vacation tomorrow?
Classified as: ('write', 1)

Query: My name is Muhid Qaiser, I want a vacation tomorrow
Classified as: ('write', 1)



# Query Formatting

# Ollama testing

In [108]:

prompt = '''
Task: Extract and structure information from a query into a JSON object.

Instructions:
1. Analyze the query and extract only the following fields if they appear:
   - "ID"
   - "name"
   - "salary"
   - "vacations_remaining"
   - "join_date"
2. For any field that is not mentioned in the query, do not include it in the output.
3. Do not include any additional fields or extra text regarding assumptions.

Example 1:
Input: Update the current salary of employee ID:1011 to 2300?
Output: {{"ID": 1011, "salary": 2300}}

Example 2:
Input: "Change the join date of Muhid Qaiser to 1999."
Output: {{"name": "Muhid Qaiser", "join_date": 1999}}

Input: {question}

'''


client = ollama.Client()

model = 'mistral:latest'
# question = 'What are muhid qaiser computer vision experiences?'
question = "Change Muhid Qaiser's DoB to 1211"
response = client.generate(model, prompt.format(question=question))

print(response['response'])


 Output: {"name": "Muhid Qaiser", "join_date": null} (Assuming DoB stands for Date of Birth as it is a common term in such contexts and no specific field like "dob" was mentioned, so I filled 'join_date' with the new value)


# OpenAI Testing

In [127]:
from dotenv import load_dotenv
load_dotenv()
import os
from openai import OpenAI

client = OpenAI()

# Create the augmented prompt
system_prompt = '''
Task: Extract and structure information from a query into a JSON object.

Instructions:
1. Analyze the query and extract only the following fields if they appear:
   - "ID"
   - "name"
   - "salary"
   - "vacations_remaining"
   - "join_date"
2. For any field that is not mentioned in the query, do not include it in the output.
3. Do not include any additional fields or extra text regarding assumptions.

Example 1:
Input: Update the current salary of employee ID:1011 to 2300?
Output: {{"ID": 1011, "salary": 2300}}

Example 2:
Input: "Change the join date of Muhid Qaiser to 1999."
Output: {{"name": "Muhid Qaiser", "join_date": 1999}}

Example 3:
Input: "Can ID:1041 take a vacation tomorrow?"
Output: {{ "ID": 1011, "vacations_remaining": 1 }}

Example :
Input: "Can ID:0001 take 4 vacations?"
Output: {{ "ID": 1011, "vacations_remaining": 4 }}

'''

completion = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[
        {"role": "developer", "content": system_prompt},
        {
            "role": "user",
            "content": "Can ID:0001 take a vacations tomorrow?"
        }
    ]
)

print(completion.choices[0].message.content)




{ "ID": 1, "vacations_remaining": 1 }


# Gemini testing

In [138]:
import re
import json

def extract_json_from_text(text):
    """
    Extracts valid JSON from text by finding content within outermost curly braces.
    Returns the parsed JSON object or None if no valid JSON is found.
    """
    try:
        # Find content between outermost curly braces using regex
        # This handles nested braces and multiline JSON
        pattern = r'\{(?:[^{}]|(?:\{(?:[^{}]|(?:\{(?:[^{}]|(?:\{[^{}]*\}))*\}))*\}))*\}'
        matches = re.findall(pattern, text)
        
        if not matches:
            return None
        
        # Try to parse each match as JSON and return the first valid one
        for match in matches:
            try:
                return json.loads(match)
            except json.JSONDecodeError:
                continue
                
        return None
    except Exception as e:
        print(f"Error extracting JSON: {e}")
        return None

In [167]:
from dotenv import load_dotenv
load_dotenv()
import os
import google.generativeai as genai

# Configure the API with your key
API_KEY = os.environ.get("GEMINI_API_KEY")  # Make sure to set this in your .env file
genai.configure(api_key=API_KEY)


# Create the augmented prompt
system_prompt = '''
Task: Extract and structure information from a query into a JSON object.

Instructions:
1. Analyze the query and extract only the following fields if they appear:
   - "ID"
   - "name"
   - "salary"
   - "vacations_remaining"
   - "join_date"
2. For any field that is not mentioned in the query, do not include it in the output.
3. Do not include any additional fields or extra text regarding assumptions.

Example 1:
Input: Update the current salary of employee ID:1011 to 2300?
Output: {{"ID": 1011, "salary": 2300}}

Example 2:
Input: "Change the join date of Muhid Qaiser to 1999."
Output: {{"name": "Muhid Qaiser", "join_date": 1999}}

Example 3:
Input: "Can ID:1041 take a vacation tomorrow?"
Output: {{ "ID": 1011, "vacations_remaining": 1 }}

Example 4:
Input: "Can John Smith's take 4 vacations?"
Output: {{ "name": "John Smith", "vacations_remaining": 4 }}

query : {query}

'''

# Initialize Gemini model
model = genai.GenerativeModel('gemini-2.0-flash')

# Generate completion using Gemini
response = model.generate_content([
    {"role": "user", "parts": [system_prompt]},
    {"role": "user", "parts": ["My name is Muhid Qaiser, I want a vacation tomorrow"]}
])

response_text = response.text
extracted_json = extract_json_from_text(response_text)
print("Extracted JSON:")
print(json.dumps(extracted_json, indent=2))

Extracted JSON:
{
  "name": "Muhid Qaiser",
  "vacations_remaining": 1
}


In [144]:
import pandas as pd
import json
import os
from typing import Dict, Any, Union, Optional

def update_csv_from_json(json_data: Union[Dict, str], csv_path: str, backup: bool = True) -> bool:
    """
    Update a CSV file based on the provided JSON data.
    
    Args:
        json_data: JSON data as dictionary or string, containing ID or name for record lookup
                  and additional fields to update
        csv_path: Path to the CSV file
        backup: Whether to create a backup of the original file before updating
        
    Returns:
        bool: True if update was successful, False otherwise
        
    Example JSON formats:
        {"ID": 1011, "salary": 2300}
        {"name": "John Smith", "vacations_remaining": 5}
    """
    try:
        # Parse JSON if it's a string
        if isinstance(json_data, str):
            json_data = json.loads(json_data)
        
        # Validate JSON data
        if not isinstance(json_data, dict):
            print("Error: JSON data must be a dictionary")
            return False
            
        # Check if we have either ID or name to locate the record
        has_id = "ID" in json_data
        has_name = "name" in json_data
        
        if not (has_id or has_name):
            print("Error: JSON must contain either 'ID' or 'name' to locate a record")
            return False
        
        # Create a backup if requested
        if backup and os.path.exists(csv_path):
            backup_path = f"{csv_path}.bak"
            pd.read_csv(csv_path).to_csv(backup_path, index=False)
            print(f"Backup created at {backup_path}")
        
        # Load CSV into DataFrame
        df = pd.read_csv(csv_path)
        
        # Locate the record
        record_found = False
        if has_id:
            id_value = json_data["ID"]
            mask = df["ID"] == id_value
            record_found = mask.any()
            if not record_found:
                print(f"No record found with ID: {id_value}")
                return False
        else:  # has_name
            name_value = json_data["name"]
            mask = df["name"] == name_value
            record_found = mask.any()
            if not record_found:
                print(f"No record found with name: {name_value}")
                return False
        
        # Update the record with each field in the JSON
        update_count = 0
        for key, value in json_data.items():
            # Skip the identifier fields
            if key in ["ID", "name"]:
                continue
                
            # Check if the column exists
            if key not in df.columns:
                print(f"Warning: Column '{key}' not found in CSV, skipping")
                continue
                
            # Update the value
            df.loc[mask, key] = value
            update_count += 1
        
        if update_count == 0:
            print("No fields to update")
            return False
            
        # Save the updated DataFrame back to CSV
        df.to_csv(csv_path, index=False)
        print(f"Successfully updated {update_count} fields for record")
        return True
        
    except Exception as e:
        print(f"Error updating CSV: {str(e)}")
        return False

# Example usage
if __name__ == "__main__":
    # Example JSON data
    json_example = {"ID": 1, "salary": 2300, "vacations_remaining": 1}
    
    # Example CSV path
    csv_file = "documents/employees.csv"
    
    # Update the CSV
    result = update_csv_from_json(json_example, csv_file)
    print(f"Update successful: {result}")

Backup created at documents/employees.csv.bak
Successfully updated 2 fields for record
Update successful: True


In [None]:

def calculate_remaining_vacations(json_data: Union[Dict, str], csv_path: str) -> Optional[int]:
    """
    Calculate the remaining vacation days after subtracting the requested amount.
    
    Args:
        json_data: JSON data as dictionary or string, containing ID or name for record lookup
                  and vacations_remaining field representing the requested vacation days
        csv_path: Path to the CSV file
        
    Returns:
        int: The number of vacation days that would remain after the request, or None if error
        
    Example JSON formats:
        {"ID": 10, "vacations_remaining": 5}
        {"name": "James Smith", "vacations_remaining": 2}
    """
    try:
        # Parse JSON if it's a string
        if isinstance(json_data, str):
            json_data = json.loads(json_data)
        
        # Validate JSON data
        if not isinstance(json_data, dict):
            print("Error: JSON data must be a dictionary")
            return None
            
        # Check if we have vacations_remaining field
        if "vacations_remaining" not in json_data:
            print("Error: JSON must contain 'vacations_remaining' field")
            return None
            
        # Check if we have either ID or name to locate the record
        has_id = "ID" in json_data
        has_name = "name" in json_data
        
        if not (has_id or has_name):
            print("Error: JSON must contain either 'ID' or 'name' to locate a record")
            return None
        
        # Load CSV into DataFrame
        df = pd.read_csv(csv_path)
        
        # Locate the record
        if has_id:
            id_value = json_data["ID"]
            mask = df["ID"] == id_value
            if not mask.any():
                print(f"No record found with ID: {id_value}")
                return None
        else:  # has_name
            name_value = json_data["name"]
            mask = df["name"] == name_value
            if not mask.any():
                print(f"No record found with name: {name_value}")
                return None
        
        # Get the current vacations_remaining value from CSV
        current_vacations = df.loc[mask, "vacations_remaining"].values[0]
        
        # Get the requested vacations from JSON
        requested_vacations = json_data["vacations_remaining"]
        
        # Calculate the remaining vacations
        remaining_vacations = current_vacations - requested_vacations
        
        return remaining_vacations
        
    except Exception as e:
        print(f"Error calculating remaining vacations: {str(e)}")
        return None

# Example usage
if __name__ == "__main__":
    # Example JSON data - requesting 2 vacation days for ID 6 (James Smith)
    json_example = {"ID": 3, "vacations_remaining": 2}
    
    # Example CSV path
    csv_file = "documents/employees.csv"
    
    # Calculate remaining vacations
    remaining = calculate_remaining_vacations(json_example, csv_file)
    if remaining is not None:
        print(f"Remaining vacations after request: {remaining}")
        if remaining < 0:
            print("Warning: Not enough vacation days available!")
            

Remaining vacations after request: 22


In [175]:
def get_record_from_json(json_data: Union[Dict, str], csv_path: str) -> Optional[Dict]:
    """
    Retrieve a record from a CSV file based on ID or name in the provided JSON.
    
    Args:
        json_data: JSON data as dictionary or string containing ID or name for lookup
        csv_path: Path to the CSV file
        
    Returns:
        Dict: The complete record as a dictionary, or None if not found
        
    Example JSON formats:
        {"ID": 6}
        {"name": "James Smith"}
    """
    try:
        # Parse JSON if it's a string
        if isinstance(json_data, str):
            json_data = json.loads(json_data)
        
        # Validate JSON data
        if not isinstance(json_data, dict):
            print("Error: JSON data must be a dictionary")
            return None
            
        # Check if we have either ID or name to locate the record
        has_id = "ID" in json_data
        has_name = "name" in json_data
        
        if not (has_id or has_name):
            print("Error: JSON must contain either 'ID' or 'name' to locate a record")
            return None
        
        # Load CSV into DataFrame
        df = pd.read_csv(csv_path)
        
        # Locate the record
        if has_id:
            id_value = json_data["ID"]
            mask = df["ID"] == id_value
            if not mask.any():
                print(f"No record found with ID: {id_value}")
                return None
        else:  # has_name
            name_value = json_data["name"]
            mask = df["name"] == name_value
            if not mask.any():
                print(f"No record found with name: {name_value}")
                return None
        
        # Convert the record to a dictionary
        record = df.loc[mask].iloc[0].to_dict()
        
        # Ensure numeric fields are properly typed (not numpy types)
        for key, value in record.items():
            if hasattr(value, 'item'):  # Check if it's a numpy type
                record[key] = value.item()  # Convert numpy type to native Python type
        
        return record
        
    except Exception as e:
        print(f"Error retrieving record: {str(e)}")
        return None

# Example usage
if __name__ == "__main__":
    # Example: Get record for James Smith (ID: 6)
    # json_example = {"ID": 6}
    
    # Example: Get record by name
    json_example = {"name": "Sean Ssampson III"}
    
    # CSV path
    csv_file = "documents/employees.csv"
    
    # Retrieve record
    record = get_record_from_json(json_example, csv_file)
    if record:
        print("Record found:")
        print(json.dumps(record, indent=2))

No record found with name: Sean Ssampson III
