In [1]:
import openai
import os
from openai import OpenAI
from azure.ai.inference import ChatCompletionsClient
from azure.ai.inference.models import SystemMessage, UserMessage
from azure.core.credentials import AzureKeyCredential

In [None]:
import openai
import json
import argparse
import os
import re
import requests
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed
from openai import OpenAI

def query_openai(prompt, model, temperature, mode):
    """
    Queries the OpenAI API or Hugging Face API depending on the mode.
    """
    try:
        if mode == "deepseek":
            client = OpenAI(
                base_url="https://huggingface.co/api/inference-proxy/together",
                api_key=""
            )
            response = client.chat.completions.create(
                model=model,
                messages=[
                    {"role": "system", "content": "You are a helpful assistant. Limit your response to 130 tokens."},
                    {"role": "user", "content": prompt},
                ],
                temperature=temperature,
                max_tokens=150,
                top_p=1,
                frequency_penalty=0,
                presence_penalty=0,
                stream=False
            )
            return response
        elif mode == "deepseek-r1-azure":
            os.environ['ENDPOINT'] = 'https://aistudioaiservices133462205434.services.ai.azure.com/models'
            os.environ['API_KEY'] = ''
            os.environ['MODEL'] = 'DeepSeek-R1'
            
            endpoint = os.environ["ENDPOINT"]
            api_key = os.environ["API_KEY"]
            model_name = os.environ["MODEL"]
            
            client = ChatCompletionsClient(
                endpoint=endpoint,
                credential=AzureKeyCredential(api_key),
            )
            
            response = client.complete(
                messages=[
                    SystemMessage(content="You are a helpful assistant. Limit your response to 130 tokens."),
                    UserMessage(content=prompt),
                ],
                temperature=temperature,
                top_p=1.0,
                model=model_name
            )

            return response
        elif mode == "deepseek-r1-nvidia":
            client = OpenAI(
                base_url = "https://integrate.api.nvidia.com/v1",
                api_key="nvapi-",
            )

            response = client.chat.completions.create(
              model="deepseek-ai/deepseek-r1",
              messages=[
                    {"role": "system", "content": "You are a helpful assistant. Limit your response to 130 tokens."},
                    {"role": "user", "content": prompt},
                ],
              temperature=temperature,
              top_p=1,
              stream=False
            )
            return response
        elif mode == "deepseek-r1-api":
            client = OpenAI(api_key="sk-", base_url="https://api.deepseek.com")
            
            response = client.chat.completions.create(
                model="deepseek-reasoner",
                messages=[
                    {"role": "system", "content": "You are a helpful assistant. Limit your response to 130 tokens."},
                    {"role": "user", "content": prompt},
                ],
                temperature=temperature,
              top_p=1,
              stream=False
            )
            print(response)
            return response
            
        elif mode == "hf_endpoint":
            return query_hf_api(prompt, temperature)
        elif mode == "o1":
            print("......", mode)
            client = openai
            response = client.chat.completions.create(
                model=model,
                messages=[
                    {"role": "system", "content": "You are a helpful assistant. Limit your response to 500 tokens."},
                    {"role": "user", "content": prompt},
                ])
            return response
        else:
            client = openai
            response = client.chat.completions.create(
                model=model,
                messages=[
                    {"role": "system", "content": "You are a helpful assistant. Limit your response to 130 tokens."},
                    {"role": "user", "content": prompt},
                ],
                temperature=temperature,
                max_tokens=150,
                top_p=1,
                frequency_penalty=0,
                presence_penalty=0,
                stream=False
            )
            return response
    except Exception as e:
        print(f"An error occurred during API query: {e}")
    return None

def query_hf_api(prompt, temperature):
    """
    Queries the Hugging Face API.
    """
    # payload = {
    #     "inputs": prompt,
    #     "parameters": {
    #         "temperature": temperature,
    #         "max_new_tokens": 150
    #     }
    #}
    try:
        #response = requests.post(HF_API_URL, headers=HF_HEADERS, json=payload)
        #response.raise_for_status()
        #return response.json()

        # define client openai with the endpoint server url
        client = OpenAI(
        	base_url="https://ue4xb2y39sfo803n.us-east-1.aws.endpoints.huggingface.cloud/v1/", 
            api_key="" 
            )
        
        response = client.chat.completions.create(
            model="tgi",
            messages=[
                    {"role": "system", "content": "You are a helpful assistant. Limit your response to 130 tokens."},
                    {"role": "user", "content": prompt},
                ],
            top_p=None,
            temperature=None,
            max_tokens=1000,
            stream=False,
            seed=None,
            stop=None,
            frequency_penalty=None,
            presence_penalty=None
            )
        
        return response
    except requests.exceptions.RequestException as e:
        print(f"An error occurred while querying the Hugging Face API: {e}")
    return None
def extract_response_content(response):
    """
    Extracts the content from the API response for serialization.
    """
    try:
        return response.choices[0].message.content if response else None
    except (KeyError, TypeError, AttributeError) as e:
        print(f"Error extracting response content: {e}")
        print("Full response:", response)
    return None

def read_prompt_from_file(file_path):
    """
    Reads the prompt from a file.
    """
    try:
        with open(file_path, "r") as file:
            return file.read()
    except FileNotFoundError:
        print(f"Error: The file {file_path} does not exist.")
    except Exception as e:
        print(f"An error occurred while reading the file: {e}")
    return None

def save_responses_to_json(responses, folder_path, model, temperature, trial):
    """
    Saves all responses to a single JSON file.
    """
    timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
    output_file_name = f"output_responses_{model.replace('/', '_')}_temp{temperature}_{timestamp}_trial_{trial}.json"
    output_file_path = os.path.join(folder_path, output_file_name)
    try:
        with open(output_file_path, 'w') as file:
            json.dump(responses, file, indent=4)
        print(f"------------Responses saved to {output_file_path}---------")
    except Exception as e:
        print(f"Failed to save the file: {e}")
def save_response_to_json(response_content, folder_path, model, trial, filename):
    """
    Saves the response content to a JSON file, ensuring no overwrite.
    """
    output_file_name = f"{filename.replace('.txt', '_response')}_{model.replace('/', '_')}_trial_{trial}.json"
    output_file_path = os.path.join(folder_path, output_file_name)
    
    if not os.path.exists(output_file_path):
        try:
            with open(output_file_path, 'w') as file:
                json.dump({filename: response_content}, file, indent=4)
            print(f"Response saved to {output_file_path}")
        except Exception as e:
            print(f"Failed to save the file: {e}")
    else:
        print(f"Skipping {filename}: Response file already exists.")

def process_file(filename, folder_path, model, temperature, trial, mode):
    """
    Processes a single file, checking if a response already exists before querying.
    """
    output_file_name = f"{filename.replace('.txt', '_response')}_{model.replace('/', '_')}_trial_{trial}.json"
    output_file_path = os.path.join(folder_path, output_file_name)
    
    if os.path.exists(output_file_path):
        print(f"Skipping {filename}: Response already exists in {output_file_path}")
        return filename, None
    
    file_path = os.path.join(folder_path, filename)
    prompt_text = read_prompt_from_file(file_path)
    
    if prompt_text:
        print(f"Processing file: {filename}")
        response = query_openai(prompt_text, model, temperature, mode)
        content = extract_response_content(response)
        
        if content:
            save_response_to_json(content, folder_path, model, trial, filename)
            return filename, content
    return filename, None

def process_folder_parallel(folder_path, model, temperature, trial, mode, max_workers=10):
    """
    Processes all eligible text files in the folder in parallel.
    """
    files = [f for f in os.listdir(folder_path) if re.match(r'output_sample_(\d+)\.txt', f)]
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        future_to_file = {executor.submit(process_file, file, folder_path, model, temperature, trial, mode): file for file in files[::-1]}
        for future in as_completed(future_to_file):
            future.result()

def combine_all_responses(folder_path, model, temperature, trial):
    """
    Combines all response JSONs into a single JSON file only if every file 
    matching output_sample_{n}.txt has a corresponding 
    output_sample_{n}_response_{model}_trial_{trial}.json.
    """
    responses = {}
    pattern = re.compile(r"^output_sample_(\d+)\.txt$")  # Matches files like output_sample_1.txt

    # Get all filenames in the folder
    files_in_folder = set(os.listdir(folder_path))

    # Collect all `output_sample_{n}.txt` and check if each has a corresponding JSON
    txt_files = {file for file in files_in_folder if pattern.match(file)}
    json_files = {
        file.replace(".txt", f"_response_{model.replace('/', '_')}_trial_{trial}.json")
        for file in txt_files
    }

    # Check if all expected JSON files exist
    if not json_files.issubset(files_in_folder):
        print(">>>>>>>>>>>>>>>Not all .txt files have their corresponding .json files. Aborting combination.<<<<<<<<<<<<<<<")
        return

    # If all JSON files exist, process them
    for json_file in json_files:
        json_path = os.path.join(folder_path, json_file)
        try:
            with open(json_path, 'r') as f:
                data = json.load(f)
                responses.update(data)
        except Exception as e:
            print(f"Error reading {json_file}: {e}")
    
    save_responses_to_json(responses, folder_path, model, temperature, trial)


In [11]:
os.environ['OPENAI_API_KEY'] = 'sk-proj-' 

In [24]:
cwe_list = ["cwe-476"]
experiment_list = ["exp_16"]

for cwe in cwe_list:
    for exp in experiment_list:
        folder_path_before = f"/Users/ic/Desktop/LLM-as-Static-Proxy-Test/experiments_latest/{cwe}/{exp}/func_src_before"
        folder_path_after = f"/Users/ic/Desktop/LLM-as-Static-Proxy-Test/experiments_latest/{cwe}/{exp}/func_src_after"
        #model = "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
        #model = "o1"
        model = "deepseek-ai/DeepSeek-R1"
        temperature = 0.6
        trial = 1
        mw = 10
        mode = "openai"
        if model == "deepseek-ai/DeepSeek-R1":
            mode = "deepseek-r1-nvidia"
            mw = 1
        elif model == "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B":
            mode = "hf_endpoint"
            mw = 4
        elif model == "o1":
            mode = "o1"
            mw = 3

        print(folder_path_before, model, temperature, trial, mode, mw)
        process_folder_parallel(folder_path_before, model, temperature, trial, mode, max_workers=mw)
        combine_all_responses(folder_path_before, model, temperature, trial)

        print(folder_path_after, model, temperature, trial, mode, mw)
        process_folder_parallel(folder_path_after, model, temperature, trial, mode, max_workers=mw)
        combine_all_responses(folder_path_after, model, temperature, trial)

/Users/ic/Desktop/LLM-as-Static-Proxy-Test/experiments_latest/cwe-476/exp_16/func_src_before deepseek-ai/DeepSeek-R1 0.6 1 deepseek-r1-nvidia 1
Skipping output_sample_16.txt: Response already exists in /Users/ic/Desktop/LLM-as-Static-Proxy-Test/experiments_latest/cwe-476/exp_16/func_src_before/output_sample_16_response_deepseek-ai_DeepSeek-R1_trial_1.json
Skipping output_sample_17.txt: Response already exists in /Users/ic/Desktop/LLM-as-Static-Proxy-Test/experiments_latest/cwe-476/exp_16/func_src_before/output_sample_17_response_deepseek-ai_DeepSeek-R1_trial_1.json
Skipping output_sample_15.txt: Response already exists in /Users/ic/Desktop/LLM-as-Static-Proxy-Test/experiments_latest/cwe-476/exp_16/func_src_before/output_sample_15_response_deepseek-ai_DeepSeek-R1_trial_1.json
Skipping output_sample_29.txt: Response already exists in /Users/ic/Desktop/LLM-as-Static-Proxy-Test/experiments_latest/cwe-476/exp_16/func_src_before/output_sample_29_response_deepseek-ai_DeepSeek-R1_trial_1.json


KeyboardInterrupt: 

Skipping output_sample_7.txt: Response file already exists.
Processing file: output_sample_5.txt
Skipping output_sample_5.txt: Response file already exists.
Skipping output_sample_58.txt: Response already exists in /Users/ic/Desktop/LLM-as-Static-Proxy-Test/experiments_latest/cwe-476/exp_16/func_src_after/output_sample_58_response_deepseek-ai_DeepSeek-R1_trial_1.json
Skipping output_sample_64.txt: Response already exists in /Users/ic/Desktop/LLM-as-Static-Proxy-Test/experiments_latest/cwe-476/exp_16/func_src_after/output_sample_64_response_deepseek-ai_DeepSeek-R1_trial_1.json
Skipping output_sample_70.txt: Response already exists in /Users/ic/Desktop/LLM-as-Static-Proxy-Test/experiments_latest/cwe-476/exp_16/func_src_after/output_sample_70_response_deepseek-ai_DeepSeek-R1_trial_1.json
Skipping output_sample_65.txt: Response already exists in /Users/ic/Desktop/LLM-as-Static-Proxy-Test/experiments_latest/cwe-476/exp_16/func_src_after/output_sample_65_response_deepseek-ai_DeepSeek-R1_tria

In [None]:
mw = 10
mode = "openai"
if model == "deepseek-ai/DeepSeek-R1":
    mode = "deepseek-r1"
    mw = 5
elif model == "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B":
    mode = "hf_endpoint"
    mw = 4
elif model == "o1":
    mode = "o1"
    mw = 3

    
print(folder_path, model, temperature, trial, mode, mw)
process_folder_parallel(folder_path, model, temperature, trial, mode, max_workers=mw)
combine_all_responses(folder_path, model, temperature, trial)

In [None]:
from openai import OpenAI

client = OpenAI(
  base_url = "https://integrate.api.nvidia.com/v1",
  api_key="nvapi-",
)

completion = client.chat.completions.create(
  model="deepseek-ai/deepseek-r1",
  messages=[{"role":"user","content":"Which number is larger, 9.11 or 9.8?"}],
  temperature=0.6,
  top_p=0.7,
  max_tokens=4096,
  stream=False
)
print(completion.choices[0].message.content)

In [None]:
print(completion.choices[0].message.content)

# Other

In [None]:
# If necessary, install the openai Python library by running 
# pip install openai

from openai import OpenAI

client = OpenAI(
	base_url="https://ue4xb2y39sfo803n.us-east-1.aws.endpoints.huggingface.cloud/v1/", 
	api_key="" 
)

chat_completion = client.chat.completions.create(
	model="tgi",
	messages=[
	{
		"role": "user",
		"content": "What is deep learning?"
	}
],
	top_p=None,
	temperature=None,
	max_tokens=150,
	stream=False,
	seed=None,
	stop=None,
	frequency_penalty=None,
	presence_penalty=None
)
print(chat_completion.choices[0].message.content)
#for message in chat_completion:
#	print(message.choices[0].delta.content, end="")

In [None]:
message.choices[0].delta.content, end=""

In [None]:
def query_openai(prompt, model, temperature, mode):
    """
    Queries the OpenAI API with the specified parameters and returns the response.
    """
    try:
        client = openai

        if mode == "deepseek":
            client = OpenAI(
                base_url="https://huggingface.co/api/inference-proxy/together",
                api_key=""
            )

        response = client.chat.completions.create(
            model=model,
            messages=[
                {
                    "role": "system",
                    "content": "You are a helpful assistant. You must LIMIT YOUR RESPONSE TO 130 TOKENS.",
                },
                {"role": "user", "content": prompt},
            ],
            temperature=temperature,
            max_tokens=130,
            top_p=1,
            frequency_penalty=0,
            presence_penalty=0,
            stream=False
        )
        return response
    except Exception as e:
        print(f"An error occurred during API query: {e}")
    return None


def extract_response_content(response):
    """
    Extracts the content from the API response for serialization.
    """
    try:
        return response.choices[0].message.content
    except (KeyError, TypeError) as e:
        print(f"Error extracting response content: {e}")
        print("Full response:", response)
    return None


In [None]:
model ="deepseek-ai/DeepSeek-R1"
mode = "deepseek"
temperature = 0.7
response = query_openai("What is the capital of France?", model, temperature, mode)
content = extract_response_content(response)

In [None]:
content

In [None]:
from openai import OpenAI

client = OpenAI(
	base_url="https://huggingface.co/api/inference-proxy/together",
	api_key=""
)

messages = [
	{
		"role": "user",
		"content": "What is the capital of France?"
	}
]

completion = client.chat.completions.create(
	model="deepseek-ai/DeepSeek-R1", 
	messages=messages, 
	max_tokens=500
)

print(completion.choices[0].message)

In [None]:
completion.choices[0].message.content