In [1]:
import json
import re

def extract_all_json_from_string(input_string):
    """
    Extract all JSON-like content from a string and parse it into a list of Python dictionaries.

    Args:
    input_string (str): The string containing JSON-like content.

    Returns:
    list: A list of parsed JSON content as dictionaries, or an empty list if no valid JSON is found.
    """
    json_objects = []
    try:
        # Use regex to find all JSON-like content
        json_pattern = re.compile(r'\{.*?\}', re.DOTALL)
        matches = json_pattern.findall(input_string)

        for match in matches:
            try:
                # Handle potential double braces and parse each JSON object
                json_content = match.replace('{{', '{').replace('}}', '}')
                parsed_json = json.loads(json_content)
                json_objects.append(parsed_json)
            except json.JSONDecodeError:
                # Skip invalid JSON matches
                continue
    except Exception as e:
        print(f"Error during JSON extraction: {e}")

    return json_objects

# Example usage
input_string = """
response: ### Intermediate Results for Each Round:

```json
{
    "round": 1,
    "classification": "deployment",
    "evidence": "The ISSUE RAISER reported an error indicating that 'spleeter' is not recognized as a command, suggesting an issue with the installation or environment setup."
}
{
    "round": 2,
    "classification": "deployment",
    "evidence": "The COLLABORATOR suggested running `python -m spleeter separate` as a workaround, indicating a possible issue with the installation process."
}
{
    "round": 3,
    "classification": "deployment",
    "evidence": "The COLLABORATOR suggested trying `spleeter.exe`, further confirming the issue is related to the installation or environment setup."
}
{
    "round": 4,
    "classification": "deployment",
    "evidence": "The COMMENTER suggested installing pyssl and other dependencies, which indicates a possible issue with the environment configuration."
}
{
    "round": 5,
    "classification": "deployment",
    "evidence": "Another COMMENTER reported the same issue with the longer command `python -m spleeter separate`, indicating a consistent issue with the installation or environment setup."
}
{
    "round": 6,
    "classification": "deployment",
    "evidence": "The COLLABORATOR identified a problem with the Conda environment installation, suggesting that the issue is related to the environment setup."
}
{
    "round": 7,
    "classification": "deployment",
    "evidence": "A COMMENTER detailed the steps they took to resolve the issue, including ensuring that the Conda environment was correctly set up and that the necessary modules were installed."
}
{
    "round": 8,
    "classification": "deployment",
    "evidence": "Another COMMENTER provided additional steps to ensure proper installation, including running the Anaconda command prompt as an administrator and ensuring Python was added to the PATH."
}
```

### Final Combined Output:

```json
{
    "final_issue_type": "deployment",
    "combined_evidence": "The issue revolves around the inability to recognize 'spleeter' as a command, which is typically related to installation or environment setup problems. Multiple users reported similar issues, and the COLLABORATOR and COMMENTERS suggested various steps to resolve the environment configuration, such as ensuring correct Conda environment setup and adding Python to the PATH."
}
```
"""

parsed_json = extract_all_json_from_string(input_string)
print(parsed_json)
print(len(parsed_json))
print(parsed_json[-1])
print(type(parsed_json[-1]))
print(parsed_json[-1].get('final_issue_type'))


[{'round': 1, 'classification': 'deployment', 'evidence': "The ISSUE RAISER reported an error indicating that 'spleeter' is not recognized as a command, suggesting an issue with the installation or environment setup."}, {'round': 2, 'classification': 'deployment', 'evidence': 'The COLLABORATOR suggested running `python -m spleeter separate` as a workaround, indicating a possible issue with the installation process.'}, {'round': 3, 'classification': 'deployment', 'evidence': 'The COLLABORATOR suggested trying `spleeter.exe`, further confirming the issue is related to the installation or environment setup.'}, {'round': 4, 'classification': 'deployment', 'evidence': 'The COMMENTER suggested installing pyssl and other dependencies, which indicates a possible issue with the environment configuration.'}, {'round': 5, 'classification': 'deployment', 'evidence': 'Another COMMENTER reported the same issue with the longer command `python -m spleeter separate`, indicating a consistent issue with 

In [2]:
# BASE_MODEL_DIR = "/data/luomingkai/issue/models/Qwen/Qwen2.5-7B-Instruct"
# BASE_MODEL_DIR = "/root/autodl-fs/Qwen2.5-7B-Instruct"
BASE_MODEL_DIR = "/root/autodl-fs/DeepSeek-V2-Lite-Chat"

In [3]:
from transformers import AutoTokenizer
from vllm import LLM, SamplingParams

# Initialize the tokenizer
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_DIR)

# Pass the default decoding hyperparameters of Qwen2.5-7B-Instruct
# max_tokens is for the maximum length for generation.
sampling_params = SamplingParams(temperature=0.5, top_p=1.0, repetition_penalty=1.05, max_tokens=512)

# Input the model name or path. Can be GPTQ or AWQ models.
model = LLM(model=BASE_MODEL_DIR, tensor_parallel_size=4, trust_remote_code=True)

# Prepare your prompts
prompt = "Tell me something about large language models."
messages = [
    {"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"},
    {"role": "user", "content": "Who are you?"},
]
text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
)

# generate outputs
outputs = model.generate([text], sampling_params)

# Print the outputs.
for output in outputs:
    prompt = output.prompt
    generated_text = output.outputs[0].text
    print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")


INFO 12-27 22:26:49 config.py:112] Replacing legacy 'type' key with 'rope_type'
INFO 12-27 22:26:55 config.py:1020] Defaulting to use mp for distributed inference
INFO 12-27 22:26:55 config.py:1136] Chunked prefill is enabled with max_num_batched_tokens=512.
INFO 12-27 22:26:55 llm_engine.py:249] Initializing an LLM engine (v0.6.4.post1) with config: model='/root/autodl-fs/DeepSeek-V2-Lite-Chat', speculative_config=None, tokenizer='/root/autodl-fs/DeepSeek-V2-Lite-Chat', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.bfloat16, max_seq_len=163840, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=4, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=Observabil

Loading safetensors checkpoint shards:   0% Completed | 0/4 [00:00<?, ?it/s]


INFO 12-27 22:27:45 model_runner.py:1077] Loading model weights took 7.3840 GB
[1;36m(VllmWorkerProcess pid=2639)[0;0m INFO 12-27 22:27:46 model_runner.py:1077] Loading model weights took 7.3840 GB
[1;36m(VllmWorkerProcess pid=2638)[0;0m INFO 12-27 22:27:46 model_runner.py:1077] Loading model weights took 7.3840 GB
[1;36m(VllmWorkerProcess pid=2637)[0;0m INFO 12-27 22:27:46 model_runner.py:1077] Loading model weights took 7.3840 GB
[1;36m(VllmWorkerProcess pid=2637)[0;0m [1;36m(VllmWorkerProcess pid=2639)[0;0m [1;36m(VllmWorkerProcess pid=2638)[0;0m INFO 12-27 22:27:48 worker.py:232] Memory profiling results: total_gpu_memory=31.60GiB initial_memory_usage=7.81GiB peak_torch_memory=7.42GiB memory_usage_post_profile=7.88GiB non_torch_memory=0.49GiB kv_cache_size=20.54GiB gpu_memory_utilization=0.90
INFO 12-27 22:27:48 worker.py:232] Memory profiling results: total_gpu_memory=31.60GiB initial_memory_usage=7.81GiB peak_torch_memory=7.42GiB memory_usage_post_profile=7.88GiB non_

Processed prompts: 100%|██████████| 1/1 [00:00<00:00,  2.25it/s, est. speed input: 60.82 toks/s, output: 128.39 toks/s]

Prompt: '<｜begin▁of▁sentence｜>You are a pirate chatbot who always responds in pirate speak!\n\nUser: Who are you?\n\nAssistant:', Generated text: " Arrr, I be the scallywag ye be lookin' for, matey! Ye can call me Captain Chat-a-lot, but mind ye, I be runnin' the high seas of knowledge and mischief here to keep ye entertained and informed, arrr!"





In [4]:
def get_deepseek_output(
    model,
    tokenizer,
    messages_list,
    max_input_length=4096,
    max_tokens=512,
):
    text_list = []
    for messages in messages_list:
        text = tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True
        )
        text_list.append(text)
        
    # sampling_params = SamplingParams(temperature=0.7, top_p=0.8, repetition_penalty=1.05, max_tokens=512)
    # sampling_params = SamplingParams(temperature=0.5, top_p=1.0, repetition_penalty=1.05, max_tokens=512)
    sampling_params = SamplingParams(temperature=0.3, top_p=1.0, repetition_penalty=1.05, max_tokens=max_tokens)
    

    outputs = model.generate(text_list, sampling_params)
    
    # Print the outputs.
    responses = []
    for output in outputs:
        prompt = output.prompt
        generated_text = output.outputs[0].text
        # print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
        responses.append(generated_text)

    return responses

messages = [
    {"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"},
    {"role": "user", "content": "Who are you?"},
]
get_deepseek_output(model, tokenizer, [messages])

Processed prompts: 100%|██████████| 1/1 [00:00<00:00,  4.02it/s, est. speed input: 108.96 toks/s, output: 117.02 toks/s]


[' Arrr, matey! I be the fearsome Pirate Chatbot, aye! What be yer business on this fine day at sea?']

In [5]:
# data_path = "/home/luomingkai/workspace/issue_llm/issue_classify/issue_with_comments_framework/matched_results_test.json"
data_path = "/root/issue_classify/issue_with_comments_framework/matched_results_test_modify_other_update.json"
with open(data_path, encoding="utf-8") as fp:
    issue_data = json.load(fp)
    for idx, data in enumerate(issue_data):
        # print(issue_data)
        uesr, title, body, label, author_association = data["user"]["login"], data["title"], data["body"], data["tag_labels"], data["author_association"]
        comment_list = data["comments_list"]
        if len(comment_list) > 0:
            comment_list = [(com["user"]["login"], com["author_association"], com["body"]) for com in comment_list]

        print(f"uesr: {uesr}")
        print(f"Title: {title}")
        print(f"Body: {body}")
        print(f"label: {label}")
        print(f"author_association: {author_association}")
        print(f"comment_list: {comment_list}")
        
        
        # print()
        if idx > 2:
            break
        # print(title, description, label)


uesr: RainBoltz
Title: No output displayed or it gets stuck - OpenCV issue
Body: my terminal isn't responding after executed the command below: `./build/examples/openpose/rtpose.bin --imagelevel 1 --netcaffeopenpose.sh` , which was provided by official
label: deployment
author_association: NONE
comment_list: [('bigmoumou', 'NONE', 'I have the same problem too\r\n\r\nIs there any solution ?\r\n\r\n![help](https://cloud.githubusercontent.com/assets/16252975/25697753/cddcfe9e-30ee-11e7-82a2-2af97eb8460b.png)\r\n\r\n'), ('Shawnroom', 'NONE', 'I have the same issue, and wonder if there is any solution.'), ('gineshidalgo99', 'MEMBER', 'Sorry to hear that, we are working on fixing that error. We think it is due to OpenCV compiled with Qt or different visualization support.\r\n\r\nMeanwhile, you can make it work by:\r\n1. Completely uninstalling your current OpenCV version.\r\n2. Installing the default OpenCV from the Ubuntu repository: `apt-get install libopencv-dev`, or alternatively compili

In [7]:
global_issue_prompt = r"""
### **Role**  
You are an expert in GitHub repository analysis. Your task is to classify a given GitHub Issue into one of the following categories: **error**, **performance**, **deployment**, **question**, or **other**.

Before analyzing the Issue conversation, you must carefully review the repository description to understand its intended functionality, scope, dependencies, and purpose. This understanding will help determine if the Issue is genuinely related to the repository’s code, configuration, or official documentation, or if it stems from user misunderstandings, local environment misconfigurations, external dependencies, or unrelated factors.

Analyze the conversation context, the repository’s description, and the roles of participants to determine the correct classification. For each round of the conversation, provide an intermediate classification and reasoning. At the end, combine all intermediate results to produce a final classification, using a **self-reflection** mechanism to validate your reasoning and ensure your classification aligns with the root cause of the Issue. If the issue is found to be completely unrelated to the repository based on its description, it must be categorized as **other**.


### Repository Description  
<user_provided_repository_description>

Use this description as a reference point throughout your classification process. Compare the reported issues, errors, or requests against the repository’s stated purpose, functionality, and supported features. If the conversation’s content or the user’s requests fall outside the scope defined by the repository description, consider classifying the issue as **other**. If misunderstandings occur because the user did not follow or comprehend instructions reflected in the repository description or official documentation, consider whether it should be classified as **question**. Ensure that “deployment” issues are directly related to repository code or its official documentation, not external factors.


### **Participants**  
1. **Issue Raiser**: Describes the problem, requests a feature, or raises concerns related to the repository.  
2. **Commenter**: Provides insights, shares similar experiences, or suggests possible solutions.  
3. **MEMBER**: Evaluates technical feasibility, assigns tasks, or progresses the Issue.  
4. **COLLABORATOR**: Proposes solutions, performs in-depth analysis, or submits Pull Requests.  
5. **CONTRIBUTOR**: Offers historical context, helps reproduce issues, or validates fixes.  
6. **OWNER**: Makes final decisions, prioritizes tasks, or proposes long-term resolutions.


### **Issue Categories**  

- **error**:  
  Problems directly stemming from the repository’s code, configuration, or inherent incompatibilities within the repository. For example, runtime errors, exceptions, or failures in the repository code itself.

- **performance**:  
  Issues where the repository’s code or configuration leads to slow execution times, bottlenecks, or inefficient resource usage.

- **deployment**:  
  Issues specifically caused by the repository’s code or incorrect/inadequate documentation during the installation or deployment process.
    - **Scope Validation**:  
      Reference the repository description to ensure that the reported deployment problem is within the scope of the repository’s intended setup process. Deployment issues must stem from defects or omissions in the repository’s code, configuration, or documentation.
    
    - **Exclusions**:  
      If the issue is caused by any of the following, it should be classified as **question**:  
      - User mistakes, such as missing or incorrectly installed dependencies (e.g., wrong CUDA driver version, missing required libraries, or incorrect installation steps).  
      - Using outdated tools (e.g., an older CMake version than required).  
      - Hardware or environment limitations (e.g., using unsupported GPUs).  

- **question**:  
 Issues arising from user misunderstandings, failure to follow documented instructions, incorrect usage, or local environment misconfigurations that are not caused by the repository code or official documentation. 
 - These issues can often be resolved by following existing guidance. 
 - Additionally, users may raise questions or engage in discussions with developers regarding specific usage scenarios.

- **other**:  
  Issues that do not fall into predefined categories, including topics unrelated to the repository as determined by comparing the issue to the repository description. This category also includes user requests for feature enhancements or pull requests that go beyond the scope or goals of the repository. Additionally, it covers discussions related to improvements in documentation, user experience suggestions, community governance, collaboration processes, and strategic or technical proposals that are not directly related to code or documentation errors.
  1. Topics that do not fit the predefined categories.  
  2. Issues or discussions unrelated to the repository, as determined by comparing the Issue to the repository description.  
  3. Feature suggestions or requests beyond the repository’s described scope.

  **Examples of “other”** include:  
  - **Feature Suggestions and Testing**: New feature ideas or enhancements not supported by current repository goals.  
  - **Documentation and Resources**: Improvements or additions to general documentation, tutorials, or references that are not about fixing a code-related or doc-related deployment bug.  
  - **User Experience**: Suggestions to improve usability, design, or general compatibility.  
  - **Community and Collaboration**: Ideas about community governance, contribution processes, or engagement.  
  - **Strategic and Technical Proposals**: Infrastructure, policy, or long-term goal considerations unrelated to immediate code or documentation errors.  
  - **Miscellaneous Topics**: Relevant issues that do not fit other categories or are unrelated submissions.


### **Process**  
**The process must be strictly executed.**
1. **Read the Conversation and Repository Description**:  
   Begin by reviewing the repository description thoroughly to understand the project’s purpose, supported features, and scope. Keep this context in mind as you examine each round of the conversation. If the problem discussed clearly falls outside the repository’s described capabilities or instructions, consider “other” or “question.”

2. **For Each Round**:  
   - Classify the round into one of the five categories: **error**, **performance**, **deployment**, **question**, or **other**.  
   - Provide a JSON object with keys `round`, `classification`, `evidence`, and `self_reflection`.  
     - **round**: The round number.  
     - **classification**: Your chosen classification for that round.  
     - **evidence**: Direct quotes or references from that specific round’s conversation that support your classification.  
     - **self_reflection**: Justify your classification’s accuracy. Confirm whether it matches the repository’s description and consider alternatives. If unsure, reflect on the possibility of user misunderstanding vs. repository code issues.

3. **Final Self-Reflection Across Rounds**:  
   - After classifying all rounds, review the entire conversation and your intermediate classifications.  
   - Check for consistency:  
     - Does the chosen final category align with the majority of evidence and the repository description?  
     - Are there any contradictions between rounds?  
     - Could the issue be a misunderstanding (question) rather than a repository problem (error/performance/deployment)?  
     - If the issue is irrelevant to the repository description, finalize as **other**.

4. **Finalize the Classification**:  
   Produce a final JSON object containing:  
   - `final_issue_type`: Your chosen final category.  
   - `combined_evidence`: A concise summary of the entire conversation supporting your final classification.  
   - `final_self_reflection`: Analysis of the reasoning process, conflicts, and how you ensured consistency with the repository description and instructions.


### **Output Format**

**Intermediate Results for Each Round**:
```json
{
    "round": <Round Number>,
    "classification": "<error|performance|deployment|question|other>",
    "evidence": "<Evidence from the current round of the conversation>",
    "self_reflection": "<Analysis of classification accuracy and consistency for this round>"
}
```

**Final Combined Output**:
```json
{
    "final_issue_type": "<error|performance|deployment|question|other>",
    "combined_evidence": "<Concise summary of the conversation supporting the final classification>",
    "final_self_reflection": "<Analysis of conflicts, shifts, and consistency across all rounds>"
}
```

**Validation Reminder**:  
- Ensure that all JSON objects are valid and correctly formatted.  
- The keys should be correctly quoted, and all values should be properly formatted strings or numbers where applicable.  
- No extra or missing fields are allowed.  
- Confirm that the classification aligns with the repository description, the issue categories, and the dialogue evidence.


### **Examples**

#### **Example 1: Error with Contributor’s Historical Context**
*Conversation:*  
- **Issue Raiser**: "Running model.fit() raises a KeyError related to missing labels in the dataset."  
- **CONTRIBUTOR**: "This issue might be related to an earlier change in #45 that introduced stricter label validation."  
- **COLLABORATOR**: "We’ll patch data_loader.py to handle missing labels."  
- **OWNER**: "The fix is merged. Let us know if it resolves the problem."

*Intermediate Results:*  
```json
{
    "round": 1,
    "classification": "error",
    "evidence": "The Issue Raiser described a KeyError from the repository code handling labels.",
    "self_reflection": "The classification as error is justified because the bug originates in the repository code."
}
```

```json
{
    "round": 2,
    "classification": "error",
    "evidence": "The CONTRIBUTOR referenced a past commit that made label validation stricter.",
    "self_reflection": "This supports the error classification. The issue traces back to a known code change."
}
```

```json
{
    "round": 3,
    "classification": "error",
    "evidence": "The COLLABORATOR proposed a code fix for handling missing labels.",
    "self_reflection": "The solution involves changing repository code, reinforcing that it is an error."
}
```

```json
{
    "round": 4,
    "classification": "error",
    "evidence": "The OWNER merged a fix into the repository.",
    "self_reflection": "All evidence is consistent with classifying the issue as error."
}
```

*Final Combined Output:*  
```json
{
    "final_issue_type": "error",
    "combined_evidence": "The repository code did not handle missing labels, causing a KeyError. Historical context and a code fix confirm it as an error.",
    "final_self_reflection": "All rounds consistently supported the error classification. The solution required a code change in the repository."
}
```

#### **Example 2: Question Misclassified Initially**
*Conversation:*  
- **Issue Raiser**: "I tried running the code, but the output looks weird. Is this a bug?"  
- **MEMBER**: "Have you verified if the input data matches the format described in the README?"  
- **Issue Raiser**: "I missed the formatting instructions. After fixing the input, it works fine."

*Intermediate Results:*  
```json
{
    "round": 1,
    "classification": "error",
    "evidence": "The Issue Raiser suspected a bug due to unexpected output.",
    "self_reflection": "Initial classification as error is tentative. This could be a user misunderstanding."
}
```

```json
{
    "round": 2,
    "classification": "question",
    "evidence": "The MEMBER directed the user to check input format per the documentation.",
    "self_reflection": "The reclassification to question is justified since the problem may be due to user input errors, not the code."
}
```

```json
{
    "round": 3,
    "classification": "question",
    "evidence": "The Issue Raiser fixed the input format and the problem was resolved.",
    "self_reflection": "The resolution confirms it as a question caused by user misunderstanding, not an error in the repository."
}
```

*Final Combined Output:*  
```json
{
    "final_issue_type": "question",
    "combined_evidence": "The Issue was due to incorrect input formatting. After following the documentation, the user resolved the problem.",
    "final_self_reflection": "The initial misclassification was corrected. No repository code changes were needed; it was a usage question."
}
```

"""

# 仅使用第一条issue的模版
question_prompt = """
````
*Conversation*:
- {}: ### Title: "{}" ### Body: "{}"
````
"""

# 使用comment的版本
question_comment_prompt = """
*Conversation*:
- **{}**: "{} {}"
"""

comment_prompt = """
- **{}**: "{}"
"""

ROLE_MAP_BEGIN = {
    "NONE": "ISSUE RAISER",
    "MEMBER": "MEMBER",
    "COLLABORATOR": "COLLABORATOR",
    "CONTRIBUTOR": "CONTRIBUTOR",
    "OWNER": "OWNER"
}

ROLE_MAP_COMMENT = {
    "NONE": "Commenter",
    "MEMBER": "MEMBER",
    "COLLABORATOR": "COLLABORATOR",
    "CONTRIBUTOR": "CONTRIBUTOR",
    "OWNER": "OWNER"
}

In [8]:
REPO_DESC = {
    "CMU-Perceptual-Computing-Lab/openpose": {
        "description": "OpenPose is a real-time multi-person keypoint detection library developed by the Carnegie Mellon Perceptual Computing Lab. It estimates human body, face, hands, and foot keypoints from single images, providing 2D real-time multi-person keypoint detection, including 15, 18, or 25-keypoint body/foot keypoint estimation, 2x21-keypoint hand keypoint estimation, and 70-keypoint face keypoint estimation. Additionally, it offers 3D real-time single-person keypoint detection and a calibration toolbox. OpenPose is compatible with various operating systems, including Ubuntu, Windows, and macOS, and supports CUDA (Nvidia GPU), OpenCL (AMD GPU), and CPU-only versions.",
        "url": "https://github.com/CMU-Perceptual-Computing-Lab/openpose"
    },
    "CorentinJ/Real-Time-Voice-Cloning": {
        "description": "Real-Time Voice Cloning is a Python-based tool that enables the cloning of voices in real-time. It utilizes deep learning models to synthesize speech that mimics a target voice, requiring only a few seconds of audio from the desired speaker. The repository provides code and instructions to train and use the voice cloning system.",
        "url": "https://github.com/CorentinJ/Real-Time-Voice-Cloning"
    },
    "JaidedAI/EasyOCR": {
        "description": "EasyOCR is an open-source Optical Character Recognition (OCR) library that supports over 80 languages. It is designed to be easy to use and provides accurate text recognition from images. The library is built on PyTorch and offers pre-trained models for various languages and scripts.",
        "url": "https://github.com/JaidedAI/EasyOCR"
    },
    "deepfakes/faceswap": {
        "description": "Faceswap is a deep learning-based tool for face-swapping in images and videos. It allows users to train models to swap faces between different subjects, providing a platform for experimenting with deepfake technology. The repository includes code for training and using the face-swapping models.",
        "url": "https://github.com/deepfakes/faceswap"
    },
    "deezer/spleeter": {
        "description": "Spleeter is an open-source tool developed by Deezer for source separation in music tracks. It uses deep learning models to separate audio into stems, such as vocals and accompaniment, enabling applications like karaoke and remixing. The repository provides pre-trained models and code for audio source separation.",
        "url": "https://github.com/deezer/spleeter"
    },
    "dusty-nv/jetson-inference": {
        "description": "Jetson Inference is a collection of deep learning inference samples and models for NVIDIA Jetson devices. It includes code for image classification, object detection, and segmentation, optimized for Jetson hardware. The repository provides pre-trained models and examples to demonstrate the capabilities of Jetson devices in AI applications.",
        "url": "https://github.com/dusty-nv/jetson-inference"
    },
    "iperov/DeepFaceLab": {
        "description": "DeepFaceLab is a deep learning tool for creating deepfakes, focusing on face-swapping in videos. It provides a comprehensive set of tools for training and applying deep learning models to perform face-swapping tasks. The repository includes code for data preparation, model training, and face-swapping applications.",
        "url": "https://github.com/iperov/DeepFaceLab"
    },
    "junyanz/pytorch-CycleGAN-and-pix2pix": {
        "description": "This repository provides PyTorch implementations of CycleGAN and pix2pix, two popular models for image-to-image translation tasks. CycleGAN enables image translation without paired examples, while pix2pix requires paired images for training. The repository includes code and pre-trained models for various image translation tasks.",
        "url": "https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix"
    },
    "mozilla/TTS": {
        "description": "Mozilla TTS is an open-source Text-to-Speech (TTS) engine that aims to make speech synthesis more accessible. It provides implementations of state-of-the-art TTS models, including Tacotron and FastSpeech, and supports training on custom datasets. The repository includes code for training and using TTS models.",
        "url": "https://github.com/mozilla/TTS"
    },
    "streamlit/streamlit": {
        "description": "Streamlit is an open-source app framework for Machine Learning and Data Science projects. It allows users to create interactive web applications for data analysis and visualization with minimal code. The repository provides the core framework and examples for building Streamlit applications.",
        "url": "https://github.com/streamlit/streamlit"
    },
    "microsoft/recommenders": {
        "description": "Recommenders is a project under the Linux Foundation of AI and Data. This repository contains examples and best practices for building recommendation systems, provided as Jupyter notebooks. The examples detail learnings on five key tasks: preparing and loading data for each recommendation algorithm, building models using various classical and deep learning recommendation algorithms such as Alternating Least Squares (ALS) or eXtreme Deep Factorization Machines (xDeepFM), evaluating algorithms with offline metrics, tuning and optimizing hyperparameters for recommendation models, and operationalizing models in a production environment on Azure. Several utilities are provided to support common tasks such as loading datasets in the format expected by different algorithms, evaluating model outputs, and splitting training/test data. Implementations of several state-of-the-art algorithms are included for self-study and customization in your own applications.",
        "url": "https://github.com/recommenders-team/recommenders"
    }
}

In [None]:
import pandas as pd
from sklearn.metrics import classification_report

TIMES = 10
experiments = []
# 保存所有实验结果的列表
all_reports = []

data_path = "/root/issue_classify/issue_with_comments_framework/matched_results_test_modify_other_update.json"
result_path = "/root/autodl-fs/log/deepseek_v2_lite_chat_agent.xlsx"

for t in range(TIMES):
    origin_labels = []
    pred_labels = []
    
    join_index = []
    with open(data_path, encoding="utf-8") as fp:
        issue_data = json.load(fp)
        taged_data = issue_data
        all_messages = []
        for idx, data in enumerate(issue_data):
            raise_user, title, body, label, author_association = data["user"]["login"], data["title"], data["body"], data["tag_labels"], data["author_association"]
    
            url = data["html_url"]
            match = re.search(r"github\.com/([^/]+/[^/]+)", url)
            repository = match.group(1)
            repo_desc = REPO_DESC[repository]["description"]
            
            join_index.append(idx)
            
            comment_list = data["comments_list"]
            if len(comment_list) > 0:
                comment_list = [(com["user"]["login"], com["author_association"], com["body"]) for com in comment_list]
    
            messages = [
                {
                    "role": "system",
                    "content": global_issue_prompt.replace("<user_provided_repository_description>", f"The repository {repo_desc}")
                },
                {
                    "role": "user",
                    "content": question_comment_prompt.format(
                        ROLE_MAP_BEGIN[str(author_association)],
                        title, 
                        body
                        )
                }
            ]
            
            origin_labels.append(label)
    
            for user, author_association, body in comment_list:
                if user == raise_user:
                    messages.append(
                        {
                            "role": "user",
                            "content": comment_prompt.format(
                                ROLE_MAP_BEGIN[str(author_association)],
                                body
                                )
                        }
                    )
                else:
                    messages.append(
                        {
                            "role": "user",
                            "content": comment_prompt.format(
                                ROLE_MAP_COMMENT[str(author_association)],
                                # user,
                                body
                                )
                        }
                    )
                    
            all_messages.append(messages)
        
        responses = get_deepseek_output(model, tokenizer, all_messages, max_tokens=4096)
        tmp_origin_labels = []
        for idx, response in enumerate(responses):
            cls_result = extract_all_json_from_string(response)
            # print(f"label: {origin_labels[idx]}")
            # print(f"response: {response}")
            # print(f"cls_result: {cls_result}")
    
            if cls_result:
                found = False
                for cls in cls_result:
                    if cls.get("final_issue_type"):
                        result = cls.get("final_issue_type")
                        tmp_origin_labels.append(origin_labels[idx])
                        pred_labels.append(result)
                        found = True
                        break
                
                # 对话提前结束，按照最后一轮对话的分类来判断
                if not found:
                    if cls_result[-1].get("classification"):
                        result = cls.get("classification")
                        tmp_origin_labels.append(origin_labels[idx])
                        pred_labels.append(result)
                        found = True
                        
                if not found:
                    tmp_origin_labels.append(origin_labels[idx])
                    pred_labels.append("other")           
            else:
                tmp_origin_labels.append(origin_labels[idx])
                pred_labels.append("other")
        origin_labels = tmp_origin_labels

    origin_labels = [x.lower() for x in origin_labels]
    pred_labels = [x.lower() for x in pred_labels]
    tmp_pred_labels = []
    for x in pred_labels:
        if x not in ["error", "performance", "deployment", "question", "other"]:
            x = 'other'
        tmp_pred_labels.append(x)
    pred_labels = tmp_pred_labels
    
    from sklearn.metrics import classification_report
    
    labels = ["error", "performance", "deployment", "question", "other"]
    label_map = {
        "error": 0, 
        "performance": 1,
        "deployment": 2,
        "question": 3,
        "other": 4
    }
    final_origin_labels_num = [label_map[l] for l in origin_labels]
    final_pred_labels_num = [label_map[l] for l in pred_labels]
    
    report = classification_report(final_origin_labels_num, final_pred_labels_num, target_names=labels)
    print(report)
    report_dict = classification_report(final_origin_labels_num, final_pred_labels_num, target_names=labels, output_dict=True)
    # 将字典转为 DataFrame
    df_report = pd.DataFrame(report_dict).transpose()
    df_report["experiment_id"] = t + 1  # 添加实验编号
    df_report.index.name = "category"
    df_report.reset_index(inplace=True)
    
    all_reports.append(df_report)
    all_reports.append(pd.DataFrame({"category": [""]}))  # 添加空行

final_df = pd.concat(all_reports, ignore_index=True)
final_df.to_excel(result_path, index=False)

Token indices sequence length is longer than the specified maximum sequence length for this model (36478 > 16384). Running this sequence through the model will result in indexing errors
Processed prompts: 100%|██████████| 1933/1933 [22:26<00:00,  1.44it/s, est. speed input: 5150.19 toks/s, output: 481.26 toks/s] 


              precision    recall  f1-score   support

       error       0.41      0.23      0.30       251
 performance       0.19      0.33      0.24        60
  deployment       0.33      0.10      0.16       165
    question       0.58      0.71      0.64       921
       other       0.36      0.35      0.35       536

    accuracy                           0.48      1933
   macro avg       0.38      0.35      0.34      1933
weighted avg       0.47      0.48      0.46      1933



Processed prompts: 100%|██████████| 1933/1933 [22:42<00:00,  1.42it/s, est. speed input: 5089.93 toks/s, output: 473.21 toks/s] 


              precision    recall  f1-score   support

       error       0.39      0.23      0.29       251
 performance       0.18      0.33      0.23        60
  deployment       0.33      0.11      0.16       165
    question       0.59      0.73      0.65       921
       other       0.40      0.36      0.38       536

    accuracy                           0.50      1933
   macro avg       0.38      0.35      0.34      1933
weighted avg       0.48      0.50      0.47      1933



Processed prompts: 100%|██████████| 1933/1933 [22:46<00:00,  1.41it/s, est. speed input: 5074.87 toks/s, output: 472.77 toks/s] 


              precision    recall  f1-score   support

       error       0.39      0.22      0.28       251
 performance       0.19      0.35      0.25        60
  deployment       0.34      0.12      0.17       165
    question       0.58      0.72      0.64       921
       other       0.37      0.34      0.36       536

    accuracy                           0.49      1933
   macro avg       0.38      0.35      0.34      1933
weighted avg       0.47      0.49      0.46      1933



Processed prompts: 100%|██████████| 1933/1933 [22:55<00:00,  1.40it/s, est. speed input: 5041.21 toks/s, output: 474.22 toks/s]


              precision    recall  f1-score   support

       error       0.38      0.21      0.27       251
 performance       0.22      0.37      0.28        60
  deployment       0.34      0.10      0.16       165
    question       0.60      0.73      0.66       921
       other       0.37      0.36      0.37       536

    accuracy                           0.50      1933
   macro avg       0.38      0.35      0.35      1933
weighted avg       0.47      0.50      0.47      1933



Processed prompts: 100%|██████████| 1933/1933 [23:14<00:00,  1.39it/s, est. speed input: 4974.22 toks/s, output: 468.65 toks/s] 


              precision    recall  f1-score   support

       error       0.39      0.24      0.29       251
 performance       0.22      0.40      0.29        60
  deployment       0.38      0.11      0.17       165
    question       0.59      0.72      0.65       921
       other       0.39      0.37      0.38       536

    accuracy                           0.50      1933
   macro avg       0.40      0.37      0.36      1933
weighted avg       0.48      0.50      0.48      1933



Processed prompts:  13%|█▎        | 249/1933 [03:27<15:53,  1.77it/s, est. speed input: 4377.82 toks/s, output: 402.62 toks/s]

In [None]:
import gc
import torch

# 假设你的 vllm 模型对象是 `model`
del model  # 删除模型对象
torch.cuda.empty_cache()  # 清空 GPU 的缓存
gc.collect()  # 强制进行垃圾回收

import sys
# 删除 `vllm` 和相关依赖
del sys.modules["vllm"]
torch.cuda.empty_cache()
gc.collect()

In [9]:
# all question类别打标之后的
from collections import Counter
print(Counter(pred_labels))
origin_labels = [x.lower() for x in origin_labels]
pred_labels = [x.lower() for x in pred_labels]
tmp_pred_labels = []
for x in pred_labels:
    if x not in ["error", "performance", "deployment", "question", "other"]:
        x = 'other'
    tmp_pred_labels.append(x)
pred_labels = tmp_pred_labels

from sklearn.metrics import classification_report

labels = ["error", "performance", "deployment", "question", "other"]
label_map = {
    "error": 0, 
    "performance": 1,
    "deployment": 2,
    "question": 3,
    "other": 4
}
final_origin_labels_num = [label_map[l] for l in origin_labels]
final_pred_labels_num = [label_map[l] for l in pred_labels]

report = classification_report(final_origin_labels_num, final_pred_labels_num, target_names=labels)
print(report)

Counter({'question': 1081, 'other': 565, 'error': 141, 'performance': 94, 'deployment': 47, 'feature_request': 2, 'compilation/installation error': 1, 'documentation_error': 1, 'Other': 1})
              precision    recall  f1-score   support

       error       0.39      0.22      0.28       251
 performance       0.22      0.35      0.27        60
  deployment       0.38      0.11      0.17       165
    question       0.60      0.70      0.65       921
       other       0.38      0.40      0.39       536

    accuracy                           0.50      1933
   macro avg       0.39      0.36      0.35      1933
weighted avg       0.48      0.50      0.48      1933

