In [None]:
import requests
import os
import json
import pandas as pd
url = "http://127.0.0.1:5000/v1/chat/completions"

headers = {
    "Content-Type": "application/json"
}

history = []
path = "./Textchunks/"
# Create the output folder if it doesn't exist
output_folder = "SupplierResults"
if not os.path.exists(output_folder):
    os.makedirs(output_folder)


def process_csv(file_path):
    # Read the CSV file
    df = pd.read_csv(file_path)

    # Extract the text column from the DataFrame
    text_list = df['Text'].tolist()

    # Initialize a dictionary to store supplier information
    supplier_dict = {}

    # Loop through each text entry
    for text in text_list:
        # Format the prompt with the text
        prompt = prompt = f'Please identify the suppliers in the following text and give the identification results. The identification results are returned in json list format.The supplier name finally returned can be multiple, only one, or none at all. For example[{{"supplier_name":"XXXX", "is_supplier_probability":"98.12"}},{{"supplier_name":"XXXX", "is_supplier_probability":"50.12"}}]/[{{"supplier_name":"XXXX", "is_supplier_probability":"98.12"}}]/：\n\ntext：{text}'
        # Create the data payload for the API request
        data = {
            "mode": "chat",
            "messages": [ {"role": "user", "content": prompt}],
            "mode": "instruct",
            "instruction_template": "Alpaca"
        }

        # Send the API request
        response = requests.post(url, headers=headers, json=data, verify=False)
#         print(response)
        assistant_message = response.json()['choices'][0]['message']['content']
#         print(assistant_message)

        # Parse the response to extract supplier information
        try:
            supplier_info = json.loads(assistant_message)
            print(supplier_info)
        except ValueError:
            supplier_info = []

        # Update the supplier dictionary
        for info in supplier_info:
            supplier_name = info.get('supplier_name','')
            is_supplier_probability = info.get('is_supplier_probability', '0.0')
            is_supplier_probability = float(is_supplier_probability) if is_supplier_probability else "0.0"
    
            if supplier_name in supplier_dict:
                # Update probability if the supplier already exists in the dictionary
                if is_supplier_probability > supplier_dict[supplier_name]:
                    supplier_dict[supplier_name] = is_supplier_probability
            else:
                # Add supplier to the dictionary if it doesn't exist
                supplier_dict[supplier_name] = is_supplier_probability

    # Save the supplier dictionary to a file
    file_name = os.path.basename(file_path)
    output_file = os.path.join(output_folder, file_name + "-supplier_results.txt")
    with open(output_file, 'w') as file:
        for supplier, probability in supplier_dict.items():
            file.write(f"Supplier: {supplier}, Probability: {probability}\n")
    print("write done")

In [None]:

# 遍历文件夹及其子文件夹中的所有文件
def list_files_recursive(directory):
    file_list = []
    for root, _, files in os.walk(directory):
        for file_path in files:
            file_path = os.path.join(root, file_path)
            file_list.append(file_path)
    return file_list
# Loop through all CSV files in the specified path
print(path)
file_list = list_files_recursive(path)
print(file_list)
for file in file_list:
    if file.endswith(".csv"):
        process_csv(file)

        # Clear history after processing each CSV file
        history = []