In [None]:
import sys
import pandas as pd
import random
import os
from sklearn.metrics.pairwise import cosine_similarity
from pydantic import BaseModel
from typing import List
import numpy as np
! pip show anthropic
! pip show openai

In [None]:
import anthropic
import openai
from openai import OpenAI
import os

ANTHROPIC_API_KEY="API_KEY"
OPENAI_API_KEY="API_KEY"

# Set your own IP to allow access
proxy_url = 'http://----'
proxy_port = 'xxxx' 

os.environ['http_proxy'] = f'{proxy_url}:{proxy_port}'
os.environ['https_proxy'] = f'{proxy_url}:{proxy_port}'

anthropic_api_key = os.getenv("ANTHROPIC_API_KEY")
openai_api_key = os.getenv("OPENAI_API_KEY")

#API Usage
client_claude = anthropic.Anthropic(api_key= ANTHROPIC_API_KEY)
client_emb = OpenAI(api_key = OPENAI_API_KEY)
client = OpenAI(api_key = OPENAI_API_KEY)

def get_embedding(text, model="text-embedding-3-small"):
    return client_emb.embeddings.create(input = [text], model=model).data[0].embedding

# Define chatbot
def chat_with_claude_sonnet(prompt,system_prompt):
    messages = [{"role": "user","content": prompt}]
    response = client_claude.messages.create(
        model="claude-3-sonnet-20240229",
        max_tokens=4096,
        temperature=0.5,
        system=system_prompt,
        messages=messages
    )
    message = response.content[0].text

    return message

def chat_with_claude_opus(prompt,system_prompt):
    messages = [{"role": "user","content": prompt}]
    response = client_claude.messages.create(
        model="claude-3-opus-20240229",
        max_tokens=4000,
        temperature=0.5,
        system=system_prompt,
        messages=messages
    )
    message = response.content[0].text

    return message

def chat_with_claude_haiku(prompt,system_prompt):
    messages = [{"role": "user","content": prompt}]
    response = client_claude.messages.create(
        model="claude-3-haiku-20240307",
        max_tokens=4000,
        temperature=0.5,
        system=system_prompt,
        messages=messages
    )
    message = response.content[0].text

    return message

def chat_with_openai(prompt, system_prompt):
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": prompt}
    ]

    completion = client.chat.completions.create(
        model="gpt-3.5-turbo-0125",   # Replace the Openai model that needs to be tested here
        messages=messages
    )

    response = completion.choices[0].message.content
    return response

In [None]:
# Pass in the vector set (Optional)
with open('Path/ab_vector.json', 'r') as file:
    sentence_vector = json.load(file)

# Before doing the test, use Colab to pass the vector search results into the problem_vector_return.txt file in the correct format.

In [None]:
# RAG+LLM
import re
import time

# Read the original txt document
with open("Path/problem_vector_return.txt ", "r", encoding='utf-8') as file:
    content = file.read()

# Use regular expressions to split multiple question parts
questions = re.split(r'-{50,}', content)

# Remove empty strings
questions = [q.strip() for q in questions if q.strip()]

# Define prompt words
prompt = """
In the following content, Query: is followed by the question to be answered, and Knowledge: and Reference: are followed by relevant reference information and literature. Please answer according to the following requirements:

1. Answer the question in Query accurately and do not deviate from the topic.
2. Use [1], [2], [3] and other annotations to cite information in Knowledge.
3. If there is insufficient information, you can make cautious guesses, but you must clearly mark them.
4. Stay objective and neutral.
5. Give an answer of appropriate length depending on the complexity of the question.
6. Use clear and professional language and explain professional terms when necessary.
7. List "Reference:" after the answer in the format: [number] literature title, journal name, PMCID/PMID/URL.

Answer the question directly without any extra words, and do not use "Query:" and "Knowledge:" in your answer. You can include a "Reference:" section in your answer.
"""

# Create a file to store the analysis results
with open("Path/Question_openai_test.txt", "w", encoding='utf-8') as output_file:
    # Process each question part
    for i, question_part in enumerate(questions, 1):
        # Set the maximum number of retries
        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                # Get the answer using LLM
                refined_answer = chat_with_claude_opus(question_part, prompt)
                
                # Write the question part and refined answer to the file
                output_file.write(f"Question Part {i}:\n{question_part}\n")
                output_file.write(f"### Response:\n{refined_answer}\n\n")
                
                print(f"Resolved issues {i}/{len(questions)}")
                break  
            except Exception as e:
                retry_count += 1
                print(f"Request failed, error message: {str(e)}")
                print(f"number of retries: {retry_count}/{max_retries}")
                
                if retry_count == max_retries:
                    print(f"The maximum number of retries has been reached, skipping the question {i}")
                else:
                    wait_time = 2 ** (retry_count - 1) 
                    print(f"Wait {wait_time} seconds before trying again...")
                    time.sleep(wait_time)

print("Done")

In [None]:
# Only LLM _ Tests the ability to answer questions only by LLMs
import time

# Define prompt words
prompt1 = """
You are an AI assistant tasked with answering questions accurately and concisely. Please follow these guidelines:

1. Answer the question directly and accurately, staying on topic.
2. Use clear, professional language. Explain technical terms if necessary.
3. If information is insufficient, you may make cautious inferences, but clearly label them as such.
4. Maintain objectivity and neutrality.
5. Adjust the length of your answer based on the complexity of the question.
6. Use [1], [2], [3], etc. to cite sources of information in your answer.
7. After your answer, list "References:" in the format: [number] Author(s). Title. Journal, Year, Volume(Issue):Pages. DOI/URL

Answer the question directly without any additional preamble or conclusion. Your response may include a "References:" section.
"""

with open("Path/Question.txt", "r", encoding='utf-8') as input_file, \
     open("Path/Question_model_test.txt", "w", encoding='utf-8') as output_file:

    # Read the problem line by line and process it
    for i, question in enumerate(input_file, 1):
        question = question.strip()
        if not question: 
            continue

        max_retries = 3
        retry_count = 0
        
        while retry_count < max_retries:
            try:
                # Answer questions using LLM
                refined_answer = chat_with_claude_opus(question, prompt1)  # chat_with_openai(question, prompt1)
                
                output_file.write(f"User:\n{question}\n\n")
                output_file.write(f"### Response:\n{refined_answer}\n\n")
                output_file.write("-" * 50 + "\n\n") 
                
                print(f"Resolved issue {i}")
                break  
            except Exception as e:
                retry_count += 1
                print(f"Request failed, error message: {str(e)}")
                print(f"number of retries: {retry_count}/{max_retries}")
                
                if retry_count == max_retries:
                    print(f"Maximum number of retries reached, skipping question{i}")
                    output_file.write(f"User:\n{question}\n\n")
                    output_file.write("### Response:\n Handling failure\n")
                    output_file.write("-" * 50 + "\n\n")
                else:
                    wait_time = 2 ** (retry_count - 1)
                    print(f"Waiting {wait_time} seconds before trying again...")
                    time.sleep(wait_time)

print("Done")