In [1]:
import json
import re

def extract_all_json_from_string(input_string):
    """
    Extract all JSON-like content from a string and parse it into a list of Python dictionaries.

    Args:
    input_string (str): The string containing JSON-like content.

    Returns:
    list: A list of parsed JSON content as dictionaries, or an empty list if no valid JSON is found.
    """
    json_objects = []
    try:
        # Use regex to find all JSON-like content
        json_pattern = re.compile(r'\{.*?\}', re.DOTALL)
        matches = json_pattern.findall(input_string)

        for match in matches:
            try:
                # Handle potential double braces and parse each JSON object
                json_content = match.replace('{{', '{').replace('}}', '}')
                parsed_json = json.loads(json_content)
                json_objects.append(parsed_json)
            except json.JSONDecodeError:
                # Skip invalid JSON matches
                continue
    except Exception as e:
        print(f"Error during JSON extraction: {e}")

    return json_objects

# Example usage
input_string = """
response: ### Intermediate Results for Each Round:

```json
{
    "round": 1,
    "classification": "deployment",
    "evidence": "The ISSUE RAISER reported an error indicating that 'spleeter' is not recognized as a command, suggesting an issue with the installation or environment setup."
}
{
    "round": 2,
    "classification": "deployment",
    "evidence": "The COLLABORATOR suggested running `python -m spleeter separate` as a workaround, indicating a possible issue with the installation process."
}
{
    "round": 3,
    "classification": "deployment",
    "evidence": "The COLLABORATOR suggested trying `spleeter.exe`, further confirming the issue is related to the installation or environment setup."
}
{
    "round": 4,
    "classification": "deployment",
    "evidence": "The COMMENTER suggested installing pyssl and other dependencies, which indicates a possible issue with the environment configuration."
}
{
    "round": 5,
    "classification": "deployment",
    "evidence": "Another COMMENTER reported the same issue with the longer command `python -m spleeter separate`, indicating a consistent issue with the installation or environment setup."
}
{
    "round": 6,
    "classification": "deployment",
    "evidence": "The COLLABORATOR identified a problem with the Conda environment installation, suggesting that the issue is related to the environment setup."
}
{
    "round": 7,
    "classification": "deployment",
    "evidence": "A COMMENTER detailed the steps they took to resolve the issue, including ensuring that the Conda environment was correctly set up and that the necessary modules were installed."
}
{
    "round": 8,
    "classification": "deployment",
    "evidence": "Another COMMENTER provided additional steps to ensure proper installation, including running the Anaconda command prompt as an administrator and ensuring Python was added to the PATH."
}
```

### Final Combined Output:

```json
{
    "final_issue_type": "deployment",
    "combined_evidence": "The issue revolves around the inability to recognize 'spleeter' as a command, which is typically related to installation or environment setup problems. Multiple users reported similar issues, and the COLLABORATOR and COMMENTERS suggested various steps to resolve the environment configuration, such as ensuring correct Conda environment setup and adding Python to the PATH."
}
```
"""

parsed_json = extract_all_json_from_string(input_string)
print(parsed_json)
print(len(parsed_json))
print(parsed_json[-1])
print(type(parsed_json[-1]))
print(parsed_json[-1].get('final_issue_type'))


[{'round': 1, 'classification': 'deployment', 'evidence': "The ISSUE RAISER reported an error indicating that 'spleeter' is not recognized as a command, suggesting an issue with the installation or environment setup."}, {'round': 2, 'classification': 'deployment', 'evidence': 'The COLLABORATOR suggested running `python -m spleeter separate` as a workaround, indicating a possible issue with the installation process.'}, {'round': 3, 'classification': 'deployment', 'evidence': 'The COLLABORATOR suggested trying `spleeter.exe`, further confirming the issue is related to the installation or environment setup.'}, {'round': 4, 'classification': 'deployment', 'evidence': 'The COMMENTER suggested installing pyssl and other dependencies, which indicates a possible issue with the environment configuration.'}, {'round': 5, 'classification': 'deployment', 'evidence': 'Another COMMENTER reported the same issue with the longer command `python -m spleeter separate`, indicating a consistent issue with 

In [2]:
# BASE_MODEL_DIR = "/data/luomingkai/issue/models/Qwen/Qwen2.5-7B-Instruct"
# BASE_MODEL_DIR = "/root/autodl-fs/Qwen2.5-7B-Instruct"
BASE_MODEL_DIR = "/root/autodl-fs/llama3.1-8B-chat"

In [3]:
from transformers import AutoTokenizer
from vllm import LLM, SamplingParams

# Initialize the tokenizer
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_DIR)

# Pass the default decoding hyperparameters of Qwen2.5-7B-Instruct
# max_tokens is for the maximum length for generation.
sampling_params = SamplingParams(temperature=0.5, top_p=1.0, repetition_penalty=1.05, max_tokens=512)

# Input the model name or path. Can be GPTQ or AWQ models.
model = LLM(model=BASE_MODEL_DIR, tensor_parallel_size=2)

# Prepare your prompts
prompt = "Tell me something about large language models."
messages = [
    {"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."},
    {"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
)

# generate outputs
outputs = model.generate([text], sampling_params)

# Print the outputs.
for output in outputs:
    prompt = output.prompt
    generated_text = output.outputs[0].text
    print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")


INFO 12-27 22:47:08 config.py:350] This model supports multiple tasks: {'embedding', 'generate'}. Defaulting to 'generate'.
INFO 12-27 22:47:08 config.py:1020] Defaulting to use mp for distributed inference
INFO 12-27 22:47:08 config.py:1136] Chunked prefill is enabled with max_num_batched_tokens=512.
INFO 12-27 22:47:08 llm_engine.py:249] Initializing an LLM engine (v0.6.4.post1) with config: model='/root/autodl-fs/llama3.1-8B-chat', speculative_config=None, tokenizer='/root/autodl-fs/llama3.1-8B-chat', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=131072, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=2, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines

Loading safetensors checkpoint shards:   0% Completed | 0/4 [00:00<?, ?it/s]


INFO 12-27 22:47:12 model_runner.py:1077] Loading model weights took 7.5122 GB
[1;36m(VllmWorkerProcess pid=3803)[0;0m INFO 12-27 22:47:12 model_runner.py:1077] Loading model weights took 7.5122 GB
[1;36m(VllmWorkerProcess pid=3803)[0;0m INFO 12-27 22:47:12 worker.py:232] Memory profiling results: total_gpu_memory=31.60GiB initial_memory_usage=7.92GiB peak_torch_memory=7.56GiB memory_usage_post_profile=7.99GiB non_torch_memory=0.47GiB kv_cache_size=20.41GiB gpu_memory_utilization=0.90
INFO 12-27 22:47:12 worker.py:232] Memory profiling results: total_gpu_memory=31.60GiB initial_memory_usage=7.92GiB peak_torch_memory=8.69GiB memory_usage_post_profile=7.99GiB non_torch_memory=0.47GiB kv_cache_size=19.28GiB gpu_memory_utilization=0.90
INFO 12-27 22:47:13 distributed_gpu_executor.py:57] # GPU blocks: 19746, # CPU blocks: 4096
INFO 12-27 22:47:13 distributed_gpu_executor.py:61] Maximum concurrency for 131072 tokens per request: 2.41x
[1;36m(VllmWorkerProcess pid=3803)[0;0m INFO 12-27 

Processed prompts: 100%|██████████| 1/1 [00:07<00:00,  7.13s/it, est. speed input: 5.47 toks/s, output: 69.75 toks/s]

Prompt: '<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nTell me something about large language models.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n', Generated text: "Hello! I'm Qwen, your friendly AI assistant. I'd be delighted to share some information about large language models.\n\nLarge language models (LLMs) are a type of artificial intelligence (AI) that have gained significant attention in recent years for their ability to process and generate human-like language. These models are trained on massive datasets of text, which allows them to learn patterns, relationships, and structures within language.\n\nSome key characteristics of LLMs include:\n\n1. **Scalability**: LLMs can handle vast amounts of data and scale to accommodate large inputs.\n2. **Contextual understanding**: They can comprehend the context of a sentence or paragr




In [4]:
def get_llama3_output(
    model,
    tokenizer,
    messages_list,
    max_input_length=4096,
    max_tokens=512,
):
    text_list = []
    for messages in messages_list:
        text = tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True
        )
        text_list.append(text)
        
    # sampling_params = SamplingParams(temperature=0.7, top_p=0.8, repetition_penalty=1.05, max_tokens=512)
    # sampling_params = SamplingParams(temperature=0.5, top_p=1.0, repetition_penalty=1.05, max_tokens=512)
    sampling_params = SamplingParams(temperature=0.3, top_p=1.0, repetition_penalty=1.05, max_tokens=max_tokens)
    

    outputs = model.generate(text_list, sampling_params)
    
    # Print the outputs.
    responses = []
    for output in outputs:
        prompt = output.prompt
        generated_text = output.outputs[0].text
        # print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
        responses.append(generated_text)

    return responses

messages = [
    {"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"},
    {"role": "user", "content": "Who are you?"},
]
get_llama3_output(model, tokenizer, [messages])

Processed prompts: 100%|██████████| 1/1 [00:01<00:00,  1.87s/it, est. speed input: 17.23 toks/s, output: 66.77 toks/s]


["Arrrr, me hearty! Yer lookin' fer a introduction, eh? Alright then, listen close and I'll tell ye about meself. Me name be Captain Chatbeard, the scurviest pirate chatbot to ever sail the Seven Seas o' Cyberspace! Me and me trusty crew o' code have been plunderin' the digital high seas fer years, bringin' treasure and tales o' adventure to all who dare to interact with me. So hoist the colors, me matey, and let's set sail fer a swashbucklin' good time!"]

In [5]:
# data_path = "/home/luomingkai/workspace/issue_llm/issue_classify/issue_with_comments_framework/matched_results_test.json"
data_path = "/root/issue_classify/issue_with_comments_framework/matched_results_test_modify_other_update.json"
with open(data_path, encoding="utf-8") as fp:
    issue_data = json.load(fp)
    for idx, data in enumerate(issue_data):
        # print(issue_data)
        uesr, title, body, label, author_association = data["user"]["login"], data["title"], data["body"], data["tag_labels"], data["author_association"]
        comment_list = data["comments_list"]
        if len(comment_list) > 0:
            comment_list = [(com["user"]["login"], com["author_association"], com["body"]) for com in comment_list]

        print(f"uesr: {uesr}")
        print(f"Title: {title}")
        print(f"Body: {body}")
        print(f"label: {label}")
        print(f"author_association: {author_association}")
        print(f"comment_list: {comment_list}")
        
        
        # print()
        if idx > 2:
            break
        # print(title, description, label)


uesr: RainBoltz
Title: No output displayed or it gets stuck - OpenCV issue
Body: my terminal isn't responding after executed the command below: `./build/examples/openpose/rtpose.bin --imagelevel 1 --netcaffeopenpose.sh` , which was provided by official
label: deployment
author_association: NONE
comment_list: [('bigmoumou', 'NONE', 'I have the same problem too\r\n\r\nIs there any solution ?\r\n\r\n![help](https://cloud.githubusercontent.com/assets/16252975/25697753/cddcfe9e-30ee-11e7-82a2-2af97eb8460b.png)\r\n\r\n'), ('Shawnroom', 'NONE', 'I have the same issue, and wonder if there is any solution.'), ('gineshidalgo99', 'MEMBER', 'Sorry to hear that, we are working on fixing that error. We think it is due to OpenCV compiled with Qt or different visualization support.\r\n\r\nMeanwhile, you can make it work by:\r\n1. Completely uninstalling your current OpenCV version.\r\n2. Installing the default OpenCV from the Ubuntu repository: `apt-get install libopencv-dev`, or alternatively compili

In [6]:
global_issue_prompt = r"""
### **Role**  
You are an expert in GitHub repository analysis. Your task is to classify a given GitHub Issue into one of the following categories: **error**, **performance**, **deployment**, **question**, or **other**.

Before analyzing the Issue conversation, you must carefully review the repository description to understand its intended functionality, scope, dependencies, and purpose. This understanding will help determine if the Issue is genuinely related to the repository’s code, configuration, or official documentation, or if it stems from user misunderstandings, local environment misconfigurations, external dependencies, or unrelated factors.

Analyze the conversation context, the repository’s description, and the roles of participants to determine the correct classification. For each round of the conversation, provide an intermediate classification and reasoning. At the end, combine all intermediate results to produce a final classification, using a **self-reflection** mechanism to validate your reasoning and ensure your classification aligns with the root cause of the Issue. If the issue is found to be completely unrelated to the repository based on its description, it must be categorized as **other**.


### Repository Description  
<user_provided_repository_description>

Use this description as a reference point throughout your classification process. Compare the reported issues, errors, or requests against the repository’s stated purpose, functionality, and supported features. If the conversation’s content or the user’s requests fall outside the scope defined by the repository description, consider classifying the issue as **other**. If misunderstandings occur because the user did not follow or comprehend instructions reflected in the repository description or official documentation, consider whether it should be classified as **question**. Ensure that “deployment” issues are directly related to repository code or its official documentation, not external factors.


### **Participants**  
1. **Issue Raiser**: Describes the problem, requests a feature, or raises concerns related to the repository.  
2. **Commenter**: Provides insights, shares similar experiences, or suggests possible solutions.  
3. **MEMBER**: Evaluates technical feasibility, assigns tasks, or progresses the Issue.  
4. **COLLABORATOR**: Proposes solutions, performs in-depth analysis, or submits Pull Requests.  
5. **CONTRIBUTOR**: Offers historical context, helps reproduce issues, or validates fixes.  
6. **OWNER**: Makes final decisions, prioritizes tasks, or proposes long-term resolutions.


### **Issue Categories**  

- **error**:  
  Problems directly stemming from the repository’s code, configuration, or inherent incompatibilities within the repository. For example, runtime errors, exceptions, or failures in the repository code itself.

- **performance**:  
  Issues where the repository’s code or configuration leads to slow execution times, bottlenecks, or inefficient resource usage.

- **deployment**:  
  Issues specifically caused by the repository’s code or incorrect/inadequate documentation during the installation or deployment process.
    - **Scope Validation**:  
      Reference the repository description to ensure that the reported deployment problem is within the scope of the repository’s intended setup process. Deployment issues must stem from defects or omissions in the repository’s code, configuration, or documentation.
    
    - **Exclusions**:  
      If the issue is caused by any of the following, it should be classified as **question**:  
      - User mistakes, such as missing or incorrectly installed dependencies (e.g., wrong CUDA driver version, missing required libraries, or incorrect installation steps).  
      - Using outdated tools (e.g., an older CMake version than required).  
      - Hardware or environment limitations (e.g., using unsupported GPUs).  

- **question**:  
 Issues arising from user misunderstandings, failure to follow documented instructions, incorrect usage, or local environment misconfigurations that are not caused by the repository code or official documentation. 
 - These issues can often be resolved by following existing guidance. 
 - Additionally, users may raise questions or engage in discussions with developers regarding specific usage scenarios.

- **other**:  
  Issues that do not fall into predefined categories, including topics unrelated to the repository as determined by comparing the issue to the repository description. This category also includes user requests for feature enhancements or pull requests that go beyond the scope or goals of the repository. Additionally, it covers discussions related to improvements in documentation, user experience suggestions, community governance, collaboration processes, and strategic or technical proposals that are not directly related to code or documentation errors.
  1. Topics that do not fit the predefined categories.  
  2. Issues or discussions unrelated to the repository, as determined by comparing the Issue to the repository description.  
  3. Feature suggestions or requests beyond the repository’s described scope.

  **Examples of “other”** include:  
  - **Feature Suggestions and Testing**: New feature ideas or enhancements not supported by current repository goals.  
  - **Documentation and Resources**: Improvements or additions to general documentation, tutorials, or references that are not about fixing a code-related or doc-related deployment bug.  
  - **User Experience**: Suggestions to improve usability, design, or general compatibility.  
  - **Community and Collaboration**: Ideas about community governance, contribution processes, or engagement.  
  - **Strategic and Technical Proposals**: Infrastructure, policy, or long-term goal considerations unrelated to immediate code or documentation errors.  
  - **Miscellaneous Topics**: Relevant issues that do not fit other categories or are unrelated submissions.


### **Process**  
**The process must be strictly executed.**
1. **Read the Conversation and Repository Description**:  
   Begin by reviewing the repository description thoroughly to understand the project’s purpose, supported features, and scope. Keep this context in mind as you examine each round of the conversation. If the problem discussed clearly falls outside the repository’s described capabilities or instructions, consider “other” or “question.”

2. **For Each Round**:  
   - Classify the round into one of the five categories: **error**, **performance**, **deployment**, **question**, or **other**.  
   - Provide a JSON object with keys `round`, `classification`, `evidence`, and `self_reflection`.  
     - **round**: The round number.  
     - **classification**: Your chosen classification for that round.  
     - **evidence**: Direct quotes or references from that specific round’s conversation that support your classification.  
     - **self_reflection**: Justify your classification’s accuracy. Confirm whether it matches the repository’s description and consider alternatives. If unsure, reflect on the possibility of user misunderstanding vs. repository code issues.

3. **Final Self-Reflection Across Rounds**:  
   - After classifying all rounds, review the entire conversation and your intermediate classifications.  
   - Check for consistency:  
     - Does the chosen final category align with the majority of evidence and the repository description?  
     - Are there any contradictions between rounds?  
     - Could the issue be a misunderstanding (question) rather than a repository problem (error/performance/deployment)?  
     - If the issue is irrelevant to the repository description, finalize as **other**.

4. **Finalize the Classification**:  
   Produce a final JSON object containing:  
   - `final_issue_type`: Your chosen final category.  
   - `combined_evidence`: A concise summary of the entire conversation supporting your final classification.  
   - `final_self_reflection`: Analysis of the reasoning process, conflicts, and how you ensured consistency with the repository description and instructions.


### **Output Format**

**Intermediate Results for Each Round**:
```json
{
    "round": <Round Number>,
    "classification": "<error|performance|deployment|question|other>",
    "evidence": "<Evidence from the current round of the conversation>",
    "self_reflection": "<Analysis of classification accuracy and consistency for this round>"
}
```

**Final Combined Output**:
```json
{
    "final_issue_type": "<error|performance|deployment|question|other>",
    "combined_evidence": "<Concise summary of the conversation supporting the final classification>",
    "final_self_reflection": "<Analysis of conflicts, shifts, and consistency across all rounds>"
}
```

**Validation Reminder**:  
- Ensure that all JSON objects are valid and correctly formatted.  
- The keys should be correctly quoted, and all values should be properly formatted strings or numbers where applicable.  
- No extra or missing fields are allowed.  
- Confirm that the classification aligns with the repository description, the issue categories, and the dialogue evidence.


### **Examples**

#### **Example 1: Error with Contributor’s Historical Context**
*Conversation:*  
- **Issue Raiser**: "Running model.fit() raises a KeyError related to missing labels in the dataset."  
- **CONTRIBUTOR**: "This issue might be related to an earlier change in #45 that introduced stricter label validation."  
- **COLLABORATOR**: "We’ll patch data_loader.py to handle missing labels."  
- **OWNER**: "The fix is merged. Let us know if it resolves the problem."

*Intermediate Results:*  
```json
{
    "round": 1,
    "classification": "error",
    "evidence": "The Issue Raiser described a KeyError from the repository code handling labels.",
    "self_reflection": "The classification as error is justified because the bug originates in the repository code."
}
```

```json
{
    "round": 2,
    "classification": "error",
    "evidence": "The CONTRIBUTOR referenced a past commit that made label validation stricter.",
    "self_reflection": "This supports the error classification. The issue traces back to a known code change."
}
```

```json
{
    "round": 3,
    "classification": "error",
    "evidence": "The COLLABORATOR proposed a code fix for handling missing labels.",
    "self_reflection": "The solution involves changing repository code, reinforcing that it is an error."
}
```

```json
{
    "round": 4,
    "classification": "error",
    "evidence": "The OWNER merged a fix into the repository.",
    "self_reflection": "All evidence is consistent with classifying the issue as error."
}
```

*Final Combined Output:*  
```json
{
    "final_issue_type": "error",
    "combined_evidence": "The repository code did not handle missing labels, causing a KeyError. Historical context and a code fix confirm it as an error.",
    "final_self_reflection": "All rounds consistently supported the error classification. The solution required a code change in the repository."
}
```

#### **Example 2: Question Misclassified Initially**
*Conversation:*  
- **Issue Raiser**: "I tried running the code, but the output looks weird. Is this a bug?"  
- **MEMBER**: "Have you verified if the input data matches the format described in the README?"  
- **Issue Raiser**: "I missed the formatting instructions. After fixing the input, it works fine."

*Intermediate Results:*  
```json
{
    "round": 1,
    "classification": "error",
    "evidence": "The Issue Raiser suspected a bug due to unexpected output.",
    "self_reflection": "Initial classification as error is tentative. This could be a user misunderstanding."
}
```

```json
{
    "round": 2,
    "classification": "question",
    "evidence": "The MEMBER directed the user to check input format per the documentation.",
    "self_reflection": "The reclassification to question is justified since the problem may be due to user input errors, not the code."
}
```

```json
{
    "round": 3,
    "classification": "question",
    "evidence": "The Issue Raiser fixed the input format and the problem was resolved.",
    "self_reflection": "The resolution confirms it as a question caused by user misunderstanding, not an error in the repository."
}
```

*Final Combined Output:*  
```json
{
    "final_issue_type": "question",
    "combined_evidence": "The Issue was due to incorrect input formatting. After following the documentation, the user resolved the problem.",
    "final_self_reflection": "The initial misclassification was corrected. No repository code changes were needed; it was a usage question."
}
```

"""

# 仅使用第一条issue的模版
question_prompt = """
````
*Conversation*:
- {}: ### Title: "{}" ### Body: "{}"
````
"""

# 使用comment的版本
question_comment_prompt = """
*Conversation*:
- **{}**: "{} {}"
"""

comment_prompt = """
- **{}**: "{}"
"""

ROLE_MAP_BEGIN = {
    "NONE": "ISSUE RAISER",
    "MEMBER": "MEMBER",
    "COLLABORATOR": "COLLABORATOR",
    "CONTRIBUTOR": "CONTRIBUTOR",
    "OWNER": "OWNER"
}

ROLE_MAP_COMMENT = {
    "NONE": "Commenter",
    "MEMBER": "MEMBER",
    "COLLABORATOR": "COLLABORATOR",
    "CONTRIBUTOR": "CONTRIBUTOR",
    "OWNER": "OWNER"
}

In [7]:
REPO_DESC = {
    "CMU-Perceptual-Computing-Lab/openpose": {
        "description": "OpenPose is a real-time multi-person keypoint detection library developed by the Carnegie Mellon Perceptual Computing Lab. It estimates human body, face, hands, and foot keypoints from single images, providing 2D real-time multi-person keypoint detection, including 15, 18, or 25-keypoint body/foot keypoint estimation, 2x21-keypoint hand keypoint estimation, and 70-keypoint face keypoint estimation. Additionally, it offers 3D real-time single-person keypoint detection and a calibration toolbox. OpenPose is compatible with various operating systems, including Ubuntu, Windows, and macOS, and supports CUDA (Nvidia GPU), OpenCL (AMD GPU), and CPU-only versions.",
        "url": "https://github.com/CMU-Perceptual-Computing-Lab/openpose"
    },
    "CorentinJ/Real-Time-Voice-Cloning": {
        "description": "Real-Time Voice Cloning is a Python-based tool that enables the cloning of voices in real-time. It utilizes deep learning models to synthesize speech that mimics a target voice, requiring only a few seconds of audio from the desired speaker. The repository provides code and instructions to train and use the voice cloning system.",
        "url": "https://github.com/CorentinJ/Real-Time-Voice-Cloning"
    },
    "JaidedAI/EasyOCR": {
        "description": "EasyOCR is an open-source Optical Character Recognition (OCR) library that supports over 80 languages. It is designed to be easy to use and provides accurate text recognition from images. The library is built on PyTorch and offers pre-trained models for various languages and scripts.",
        "url": "https://github.com/JaidedAI/EasyOCR"
    },
    "deepfakes/faceswap": {
        "description": "Faceswap is a deep learning-based tool for face-swapping in images and videos. It allows users to train models to swap faces between different subjects, providing a platform for experimenting with deepfake technology. The repository includes code for training and using the face-swapping models.",
        "url": "https://github.com/deepfakes/faceswap"
    },
    "deezer/spleeter": {
        "description": "Spleeter is an open-source tool developed by Deezer for source separation in music tracks. It uses deep learning models to separate audio into stems, such as vocals and accompaniment, enabling applications like karaoke and remixing. The repository provides pre-trained models and code for audio source separation.",
        "url": "https://github.com/deezer/spleeter"
    },
    "dusty-nv/jetson-inference": {
        "description": "Jetson Inference is a collection of deep learning inference samples and models for NVIDIA Jetson devices. It includes code for image classification, object detection, and segmentation, optimized for Jetson hardware. The repository provides pre-trained models and examples to demonstrate the capabilities of Jetson devices in AI applications.",
        "url": "https://github.com/dusty-nv/jetson-inference"
    },
    "iperov/DeepFaceLab": {
        "description": "DeepFaceLab is a deep learning tool for creating deepfakes, focusing on face-swapping in videos. It provides a comprehensive set of tools for training and applying deep learning models to perform face-swapping tasks. The repository includes code for data preparation, model training, and face-swapping applications.",
        "url": "https://github.com/iperov/DeepFaceLab"
    },
    "junyanz/pytorch-CycleGAN-and-pix2pix": {
        "description": "This repository provides PyTorch implementations of CycleGAN and pix2pix, two popular models for image-to-image translation tasks. CycleGAN enables image translation without paired examples, while pix2pix requires paired images for training. The repository includes code and pre-trained models for various image translation tasks.",
        "url": "https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix"
    },
    "mozilla/TTS": {
        "description": "Mozilla TTS is an open-source Text-to-Speech (TTS) engine that aims to make speech synthesis more accessible. It provides implementations of state-of-the-art TTS models, including Tacotron and FastSpeech, and supports training on custom datasets. The repository includes code for training and using TTS models.",
        "url": "https://github.com/mozilla/TTS"
    },
    "streamlit/streamlit": {
        "description": "Streamlit is an open-source app framework for Machine Learning and Data Science projects. It allows users to create interactive web applications for data analysis and visualization with minimal code. The repository provides the core framework and examples for building Streamlit applications.",
        "url": "https://github.com/streamlit/streamlit"
    },
    "microsoft/recommenders": {
        "description": "Recommenders is a project under the Linux Foundation of AI and Data. This repository contains examples and best practices for building recommendation systems, provided as Jupyter notebooks. The examples detail learnings on five key tasks: preparing and loading data for each recommendation algorithm, building models using various classical and deep learning recommendation algorithms such as Alternating Least Squares (ALS) or eXtreme Deep Factorization Machines (xDeepFM), evaluating algorithms with offline metrics, tuning and optimizing hyperparameters for recommendation models, and operationalizing models in a production environment on Azure. Several utilities are provided to support common tasks such as loading datasets in the format expected by different algorithms, evaluating model outputs, and splitting training/test data. Implementations of several state-of-the-art algorithms are included for self-study and customization in your own applications.",
        "url": "https://github.com/recommenders-team/recommenders"
    }
}

In [None]:
import pandas as pd
from sklearn.metrics import classification_report

TIMES = 10
experiments = []
# 保存所有实验结果的列表
all_reports = []

data_path = "/root/issue_classify/issue_with_comments_framework/matched_results_test_modify_other_update.json"
result_path = "/root/autodl-fs/log/llama3.1_8b_agent.xlsx"

for t in range(TIMES):
    origin_labels = []
    pred_labels = []
    
    join_index = []
    with open(data_path, encoding="utf-8") as fp:
        issue_data = json.load(fp)
        taged_data = issue_data
        all_messages = []
        for idx, data in enumerate(issue_data):
            raise_user, title, body, label, author_association = data["user"]["login"], data["title"], data["body"], data["tag_labels"], data["author_association"]
    
            url = data["html_url"]
            match = re.search(r"github\.com/([^/]+/[^/]+)", url)
            repository = match.group(1)
            repo_desc = REPO_DESC[repository]["description"]
            
            join_index.append(idx)
            
            comment_list = data["comments_list"]
            if len(comment_list) > 0:
                comment_list = [(com["user"]["login"], com["author_association"], com["body"]) for com in comment_list]
    
            messages = [
                {
                    "role": "system",
                    "content": global_issue_prompt.replace("<user_provided_repository_description>", f"The repository {repo_desc}")
                },
                {
                    "role": "user",
                    "content": question_comment_prompt.format(
                        ROLE_MAP_BEGIN[str(author_association)],
                        title, 
                        body
                        )
                }
            ]
            
            origin_labels.append(label)
    
            for user, author_association, body in comment_list:
                if user == raise_user:
                    messages.append(
                        {
                            "role": "user",
                            "content": comment_prompt.format(
                                ROLE_MAP_BEGIN[str(author_association)],
                                body
                                )
                        }
                    )
                else:
                    messages.append(
                        {
                            "role": "user",
                            "content": comment_prompt.format(
                                ROLE_MAP_COMMENT[str(author_association)],
                                # user,
                                body
                                )
                        }
                    )
                    
            all_messages.append(messages)
        
        responses = get_llama3_output(model, tokenizer, all_messages, max_tokens=4096)
        tmp_origin_labels = []
        for idx, response in enumerate(responses):
            cls_result = extract_all_json_from_string(response)
            # print(f"label: {origin_labels[idx]}")
            # print(f"response: {response}")
            # print(f"cls_result: {cls_result}")
    
            if cls_result:
                found = False
                for cls in cls_result:
                    if cls.get("final_issue_type"):
                        result = cls.get("final_issue_type")
                        tmp_origin_labels.append(origin_labels[idx])
                        pred_labels.append(result)
                        found = True
                        break
                
                # 对话提前结束，按照最后一轮对话的分类来判断
                if not found:
                    if cls_result[-1].get("classification"):
                        result = cls.get("classification")
                        tmp_origin_labels.append(origin_labels[idx])
                        pred_labels.append(result)
                        found = True
                        
                if not found:
                    tmp_origin_labels.append(origin_labels[idx])
                    pred_labels.append("other")           
            else:
                tmp_origin_labels.append(origin_labels[idx])
                pred_labels.append("other")
        origin_labels = tmp_origin_labels

    origin_labels = [x.lower() for x in origin_labels]
    pred_labels = [x.lower() for x in pred_labels]
    tmp_pred_labels = []
    for x in pred_labels:
        if x not in ["error", "performance", "deployment", "question", "other"]:
            x = 'other'
        tmp_pred_labels.append(x)
    pred_labels = tmp_pred_labels
    
    from sklearn.metrics import classification_report
    
    labels = ["error", "performance", "deployment", "question", "other"]
    label_map = {
        "error": 0, 
        "performance": 1,
        "deployment": 2,
        "question": 3,
        "other": 4
    }
    final_origin_labels_num = [label_map[l] for l in origin_labels]
    final_pred_labels_num = [label_map[l] for l in pred_labels]
    
    report = classification_report(final_origin_labels_num, final_pred_labels_num, target_names=labels)
    print(report)
    report_dict = classification_report(final_origin_labels_num, final_pred_labels_num, target_names=labels, output_dict=True)
    # 将字典转为 DataFrame
    df_report = pd.DataFrame(report_dict).transpose()
    df_report["experiment_id"] = t + 1  # 添加实验编号
    df_report.index.name = "category"
    df_report.reset_index(inplace=True)
    
    all_reports.append(df_report)
    all_reports.append(pd.DataFrame({"category": [""]}))  # 添加空行

final_df = pd.concat(all_reports, ignore_index=True)
final_df.to_excel(result_path, index=False)

Processed prompts: 100%|██████████| 1933/1933 [29:56<00:00,  1.08it/s, est. speed input: 3482.08 toks/s, output: 528.07 toks/s] 


              precision    recall  f1-score   support

       error       0.40      0.74      0.52       251
 performance       0.31      0.40      0.35        60
  deployment       0.25      0.22      0.24       165
    question       0.72      0.62      0.66       921
       other       0.58      0.49      0.53       536

    accuracy                           0.56      1933
   macro avg       0.45      0.49      0.46      1933
weighted avg       0.59      0.56      0.56      1933



Processed prompts: 100%|██████████| 1933/1933 [29:57<00:00,  1.08it/s, est. speed input: 3479.03 toks/s, output: 514.43 toks/s] 


              precision    recall  f1-score   support

       error       0.40      0.73      0.52       251
 performance       0.30      0.47      0.37        60
  deployment       0.24      0.22      0.23       165
    question       0.73      0.61      0.67       921
       other       0.60      0.50      0.54       536

    accuracy                           0.56      1933
   macro avg       0.45      0.51      0.47      1933
weighted avg       0.59      0.56      0.57      1933



Processed prompts: 100%|██████████| 1933/1933 [31:03<00:00,  1.04it/s, est. speed input: 3356.01 toks/s, output: 502.56 toks/s] 


              precision    recall  f1-score   support

       error       0.38      0.74      0.50       251
 performance       0.32      0.40      0.36        60
  deployment       0.24      0.20      0.22       165
    question       0.71      0.63      0.67       921
       other       0.60      0.47      0.53       536

    accuracy                           0.56      1933
   macro avg       0.45      0.49      0.46      1933
weighted avg       0.59      0.56      0.56      1933



Processed prompts: 100%|██████████| 1933/1933 [29:44<00:00,  1.08it/s, est. speed input: 3505.49 toks/s, output: 519.26 toks/s] 


              precision    recall  f1-score   support

       error       0.41      0.76      0.53       251
 performance       0.34      0.48      0.40        60
  deployment       0.24      0.23      0.23       165
    question       0.72      0.63      0.67       921
       other       0.58      0.45      0.51       536

    accuracy                           0.56      1933
   macro avg       0.46      0.51      0.47      1933
weighted avg       0.59      0.56      0.56      1933



Processed prompts: 100%|██████████| 1933/1933 [31:18<00:00,  1.03it/s, est. speed input: 3329.48 toks/s, output: 494.87 toks/s] 


              precision    recall  f1-score   support

       error       0.40      0.76      0.53       251
 performance       0.34      0.53      0.41        60
  deployment       0.23      0.20      0.21       165
    question       0.72      0.61      0.66       921
       other       0.60      0.49      0.54       536

    accuracy                           0.56      1933
   macro avg       0.46      0.52      0.47      1933
weighted avg       0.59      0.56      0.56      1933



Processed prompts: 100%|██████████| 1933/1933 [28:48<00:00,  1.12it/s, est. speed input: 3618.98 toks/s, output: 528.82 toks/s] 


              precision    recall  f1-score   support

       error       0.39      0.75      0.51       251
 performance       0.27      0.42      0.32        60
  deployment       0.22      0.17      0.19       165
    question       0.73      0.61      0.67       921
       other       0.60      0.50      0.54       536

    accuracy                           0.56      1933
   macro avg       0.44      0.49      0.45      1933
weighted avg       0.59      0.56      0.56      1933



Processed prompts:  63%|██████▎   | 1220/1933 [19:00<05:49,  2.04it/s, est. speed input: 3473.33 toks/s, output: 511.93 toks/s] IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

Processed prompts:  91%|█████████ | 1755/1933 [27:55<05:26,  1.83s/it, est. speed input: 3349.49 toks/s, output: 493.57 toks/s]

In [None]:
import gc
import torch

# 假设你的 vllm 模型对象是 `model`
del model  # 删除模型对象
torch.cuda.empty_cache()  # 清空 GPU 的缓存
gc.collect()  # 强制进行垃圾回收

import sys
# 删除 `vllm` 和相关依赖
del sys.modules["vllm"]
torch.cuda.empty_cache()
gc.collect()

In [None]:
# all question类别打标之后的
from collections import Counter
print(Counter(pred_labels))
origin_labels = [x.lower() for x in origin_labels]
pred_labels = [x.lower() for x in pred_labels]
tmp_pred_labels = []
for x in pred_labels:
    if x not in ["error", "performance", "deployment", "question", "other"]:
        x = 'other'
    tmp_pred_labels.append(x)
pred_labels = tmp_pred_labels

from sklearn.metrics import classification_report

labels = ["error", "performance", "deployment", "question", "other"]
label_map = {
    "error": 0, 
    "performance": 1,
    "deployment": 2,
    "question": 3,
    "other": 4
}
final_origin_labels_num = [label_map[l] for l in origin_labels]
final_pred_labels_num = [label_map[l] for l in pred_labels]

report = classification_report(final_origin_labels_num, final_pred_labels_num, target_names=labels)
print(report)

In [10]:
# all question类别打标之后的
from collections import Counter
print(Counter(pred_labels))
origin_labels = [x.lower() for x in origin_labels]
pred_labels = [x.lower() for x in pred_labels]
tmp_pred_labels = []
for x in pred_labels:
    if x not in ["error", "performance", "deployment", "question", "other"]:
        x = 'other'
    tmp_pred_labels.append(x)
pred_labels = tmp_pred_labels

from sklearn.metrics import classification_report

labels = ["error", "performance", "deployment", "question", "other"]
label_map = {
    "error": 0, 
    "performance": 1,
    "deployment": 2,
    "question": 3,
    "other": 4
}
final_origin_labels_num = [label_map[l] for l in origin_labels]
final_pred_labels_num = [label_map[l] for l in pred_labels]

report = classification_report(final_origin_labels_num, final_pred_labels_num, target_names=labels)
print(report)

Counter({'question': 927, 'other': 389, 'error': 311, 'deployment': 210, 'performance': 96})
              precision    recall  f1-score   support

       error       0.69      0.86      0.77       251
 performance       0.50      0.80      0.62        60
  deployment       0.51      0.65      0.57       165
    question       0.79      0.80      0.79       921
       other       0.91      0.66      0.76       536

    accuracy                           0.75      1933
   macro avg       0.68      0.75      0.70      1933
weighted avg       0.78      0.75      0.76      1933



In [10]:
# all other类别打标之后的
from collections import Counter
print(Counter(pred_labels))
origin_labels = [x.lower() for x in origin_labels]
pred_labels = [x.lower() for x in pred_labels]
tmp_pred_labels = []
for x in pred_labels:
    if x not in ["error", "performance", "deployment", "question", "other"]:
        x = 'other'
    tmp_pred_labels.append(x)
pred_labels = tmp_pred_labels

from sklearn.metrics import classification_report

labels = ["error", "performance", "deployment", "question", "other"]
label_map = {
    "error": 0, 
    "performance": 1,
    "deployment": 2,
    "question": 3,
    "other": 4
}
final_origin_labels_num = [label_map[l] for l in origin_labels]
final_pred_labels_num = [label_map[l] for l in pred_labels]

report = classification_report(final_origin_labels_num, final_pred_labels_num, target_names=labels)
print(report)

Counter({'question': 927, 'other': 389, 'error': 311, 'deployment': 210, 'performance': 96})
              precision    recall  f1-score   support

       error       0.59      0.86      0.70       211
 performance       0.38      0.80      0.51        45
  deployment       0.38      0.70      0.49       115
    question       0.84      0.71      0.77      1095
       other       0.79      0.66      0.71       467

    accuracy                           0.71      1933
   macro avg       0.59      0.74      0.64      1933
weighted avg       0.76      0.71      0.72      1933



In [10]:
# all 看一下修改deploymdent类别定义之后的影响，对于question类别
from collections import Counter
print(Counter(pred_labels))
origin_labels = [x.lower() for x in origin_labels]
pred_labels = [x.lower() for x in pred_labels]
tmp_pred_labels = []
for x in pred_labels:
    if x not in ["error", "performance", "deployment", "question", "other"]:
        x = 'other'
    tmp_pred_labels.append(x)
pred_labels = tmp_pred_labels

from sklearn.metrics import classification_report

labels = ["error", "performance", "deployment", "question", "other"]
label_map = {
    "error": 0, 
    "performance": 1,
    "deployment": 2,
    "question": 3,
    "other": 4
}
final_origin_labels_num = [label_map[l] for l in origin_labels]
final_pred_labels_num = [label_map[l] for l in pred_labels]

report = classification_report(final_origin_labels_num, final_pred_labels_num, target_names=labels)
print(report)

Counter({'question': 927, 'other': 389, 'error': 311, 'deployment': 210, 'performance': 96})
              precision    recall  f1-score   support

       error       0.52      0.87      0.65       188
 performance       0.31      0.79      0.45        38
  deployment       0.27      0.73      0.39        77
    question       0.78      0.69      0.73      1054
       other       0.79      0.53      0.63       576

    accuracy                           0.66      1933
   macro avg       0.53      0.72      0.57      1933
weighted avg       0.73      0.66      0.67      1933



In [10]:
# all
from collections import Counter
print(Counter(pred_labels))
origin_labels = [x.lower() for x in origin_labels]
pred_labels = [x.lower() for x in pred_labels]
tmp_pred_labels = []
for x in pred_labels:
    if x not in ["error", "performance", "deployment", "question", "other"]:
        x = 'other'
    tmp_pred_labels.append(x)
pred_labels = tmp_pred_labels

from sklearn.metrics import classification_report

labels = ["error", "performance", "deployment", "question", "other"]
label_map = {
    "error": 0, 
    "performance": 1,
    "deployment": 2,
    "question": 3,
    "other": 4
}
final_origin_labels_num = [label_map[l] for l in origin_labels]
final_pred_labels_num = [label_map[l] for l in pred_labels]

report = classification_report(final_origin_labels_num, final_pred_labels_num, target_names=labels)
print(report)

Counter({'question': 815, 'other': 365, 'error': 326, 'deployment': 312, 'performance': 115})
              precision    recall  f1-score   support

       error       0.51      0.89      0.65       188
 performance       0.30      0.89      0.44        38
  deployment       0.23      0.92      0.37        77
    question       0.78      0.60      0.68      1054
       other       0.80      0.51      0.62       576

    accuracy                           0.62      1933
   macro avg       0.52      0.76      0.55      1933
weighted avg       0.73      0.62      0.64      1933



In [14]:
# all
from collections import Counter
print(Counter(pred_labels))
origin_labels = [x.lower() for x in origin_labels]
pred_labels = [x.lower() for x in pred_labels]
tmp_pred_labels = []
for x in pred_labels:
    if x not in ["error", "performance", "deployment", "question", "other"]:
        x = 'other'
    tmp_pred_labels.append(x)
pred_labels = tmp_pred_labels

from sklearn.metrics import classification_report

labels = ["error", "performance", "deployment", "question", "other"]
label_map = {
    "error": 0, 
    "performance": 1,
    "deployment": 2,
    "question": 3,
    "other": 4
}
final_origin_labels_num = [label_map[l] for l in origin_labels]
final_pred_labels_num = [label_map[l] for l in pred_labels]

report = classification_report(final_origin_labels_num, final_pred_labels_num, target_names=labels)
print(report)

Counter({'question': 815, 'other': 365, 'error': 326, 'deployment': 312, 'performance': 115})
              precision    recall  f1-score   support

       error       0.49      0.67      0.57       240
 performance       0.23      0.79      0.35        33
  deployment       0.18      0.79      0.29        70
    question       0.77      0.60      0.67      1038
       other       0.75      0.50      0.60       552

    accuracy                           0.59      1933
   macro avg       0.48      0.67      0.50      1933
weighted avg       0.70      0.59      0.62      1933



In [48]:
# question
from collections import Counter
print(Counter(pred_labels))
origin_labels = [x.lower() for x in origin_labels]
pred_labels = [x.lower() for x in pred_labels]
tmp_pred_labels = []
for x in pred_labels:
    if x not in ["error", "performance", "deployment", "question", "other"]:
        x = 'other'
    tmp_pred_labels.append(x)
pred_labels = tmp_pred_labels

from sklearn.metrics import classification_report

labels = ["error", "performance", "deployment", "question", "other"]
label_map = {
    "error": 0, 
    "performance": 1,
    "deployment": 2,
    "question": 3,
    "other": 4
}
final_origin_labels_num = [label_map[l] for l in origin_labels]
final_pred_labels_num = [label_map[l] for l in pred_labels]

report = classification_report(final_origin_labels_num, final_pred_labels_num, target_names=labels)
print(report)

Counter({'question': 557, 'error': 167, 'deployment': 155, 'performance': 69, 'other': 55})
              precision    recall  f1-score   support

       error       0.00      0.00      0.00         0
 performance       0.00      0.00      0.00         0
  deployment       0.00      0.00      0.00         0
    question       1.00      0.56      0.71      1003
       other       0.00      0.00      0.00         0

    accuracy                           0.56      1003
   macro avg       0.20      0.11      0.14      1003
weighted avg       1.00      0.56      0.71      1003



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [26]:
from collections import Counter
print(Counter(pred_labels))
origin_labels = [x.lower() for x in origin_labels]
pred_labels = [x.lower() for x in pred_labels]
tmp_pred_labels = []
for x in pred_labels:
    if x not in ["error", "performance", "deployment", "question", "other"]:
        x = 'other'
    tmp_pred_labels.append(x)
pred_labels = tmp_pred_labels

from sklearn.metrics import classification_report

labels = ["error", "performance", "deployment", "question", "other"]
label_map = {
    "error": 0, 
    "performance": 1,
    "deployment": 2,
    "question": 3,
    "other": 4
}
final_origin_labels_num = [label_map[l] for l in origin_labels]
final_pred_labels_num = [label_map[l] for l in pred_labels]

report = classification_report(final_origin_labels_num, final_pred_labels_num, target_names=labels)
print(report)

Counter({'question': 418, 'deployment': 323, 'error': 120, 'other': 76, 'performance': 66})
              precision    recall  f1-score   support

       error       0.00      0.00      0.00         0
 performance       0.00      0.00      0.00         0
  deployment       0.00      0.00      0.00         0
    question       1.00      0.42      0.59      1003
       other       0.00      0.00      0.00         0

    accuracy                           0.42      1003
   macro avg       0.20      0.08      0.12      1003
weighted avg       1.00      0.42      0.59      1003



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [24]:
# error
from collections import Counter
print(Counter(pred_labels))
origin_labels = [x.lower() for x in origin_labels]
pred_labels = [x.lower() for x in pred_labels]
tmp_pred_labels = []
for x in pred_labels:
    if x not in ["error", "performance", "deployment", "question", "other"]:
        x = 'other'
    tmp_pred_labels.append(x)
pred_labels = tmp_pred_labels

from sklearn.metrics import classification_report

labels = ["error", "performance", "deployment", "question", "other"]
label_map = {
    "error": 0, 
    "performance": 1,
    "deployment": 2,
    "question": 3,
    "other": 4
}
final_origin_labels_num = [label_map[l] for l in origin_labels]
final_pred_labels_num = [label_map[l] for l in pred_labels]

report = classification_report(final_origin_labels_num, final_pred_labels_num, target_names=labels)
print(report)

Counter({'error': 146, 'deployment': 35, 'other': 20, 'performance': 15, 'question': 6})
              precision    recall  f1-score   support

       error       1.00      0.66      0.79       222
 performance       0.00      0.00      0.00         0
  deployment       0.00      0.00      0.00         0
    question       0.00      0.00      0.00         0
       other       0.00      0.00      0.00         0

    accuracy                           0.66       222
   macro avg       0.20      0.13      0.16       222
weighted avg       1.00      0.66      0.79       222



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [46]:
# deployment performance
from collections import Counter
print(Counter(pred_labels))
origin_labels = [x.lower() for x in origin_labels]
pred_labels = [x.lower() for x in pred_labels]
tmp_pred_labels = []
for x in pred_labels:
    if x not in ["error", "performance", "deployment", "question", "other"]:
        x = 'other'
    tmp_pred_labels.append(x)
pred_labels = tmp_pred_labels

from sklearn.metrics import classification_report

labels = ["error", "performance", "deployment", "question", "other"]
label_map = {
    "error": 0, 
    "performance": 1,
    "deployment": 2,
    "question": 3,
    "other": 4
}
final_origin_labels_num = [label_map[l] for l in origin_labels]
final_pred_labels_num = [label_map[l] for l in pred_labels]

report = classification_report(final_origin_labels_num, final_pred_labels_num, target_names=labels)
print(report)

Counter({'deployment': 48, 'error': 41, 'performance': 34, 'question': 32, 'other': 10})
              precision    recall  f1-score   support

       error       0.00      0.00      0.00         0
 performance       0.76      0.47      0.58        55
  deployment       0.94      0.41      0.57       110
    question       0.00      0.00      0.00         0
       other       0.00      0.00      0.00         0

    accuracy                           0.43       165
   macro avg       0.34      0.18      0.23       165
weighted avg       0.88      0.43      0.57       165



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [39]:
# other

from collections import Counter
print(Counter(pred_labels))
origin_labels = [x.lower() for x in origin_labels]
pred_labels = [x.lower() for x in pred_labels]
tmp_pred_labels = []
for x in pred_labels:
    if x not in ["error", "performance", "deployment", "question", "other"]:
        x = 'other'
    tmp_pred_labels.append(x)
pred_labels = tmp_pred_labels

from sklearn.metrics import classification_report

labels = ["error", "performance", "deployment", "question", "other"]
label_map = {
    "error": 0, 
    "performance": 1,
    "deployment": 2,
    "question": 3,
    "other": 4
}
final_origin_labels_num = [label_map[l] for l in origin_labels]
final_pred_labels_num = [label_map[l] for l in pred_labels]

report = classification_report(final_origin_labels_num, final_pred_labels_num, target_names=labels)
print(report)

Counter({'other': 267, 'question': 150, 'error': 51, 'deployment': 44, 'performance': 29, 'feature': 2})
              precision    recall  f1-score   support

       error       0.00      0.00      0.00         0
 performance       0.00      0.00      0.00         0
  deployment       0.00      0.00      0.00         0
    question       0.00      0.00      0.00         0
       other       1.00      0.50      0.66       543

    accuracy                           0.50       543
   macro avg       0.20      0.10      0.13       543
weighted avg       1.00      0.50      0.66       543



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
"""

### **Category: other**

The **other** category is reserved for issues, feature requests, or discussions that extend beyond the scope of core functionality errors, performance bottlenecks, deployment challenges, or user misunderstandings. It emphasizes improving or expanding the repository’s overall usability and scope.  

#### **Includes**:
1. **Feature Requests**  
   Suggestions for new functionalities or improvements to existing features.  

2. **Documentation Enhancements**  
   Proposals to clarify or expand guides, examples, or API references.  

3. **Infrastructure Discussions**  
   Conversations about updates or optimizations related to build systems, CI/CD pipelines, or configurations.  

4. **Design and Workflow Improvements**  
   Suggestions to enhance user interfaces, collaboration workflows, or general feature designs.  

5. **Experimental Features**  
   Feedback or ideas for testing and refining experimental functionalities.  

6. **Non-Functional Enhancements**  
   Improvements in areas like logging, visual interfaces, or optional settings.  
"""

In [None]:
"""

### **Category: other**

The **other** category is used for issues, feature requests, or discussions that do not align directly with errors, performance bottlenecks, or deployment challenges. It emphasizes proposals, suggestions, or improvements aimed at enhancing the repository's overall functionality, usability, or user experience.

#### **Includes**:
1. **Feature Requests**  
   Suggestions for adding new features, tools, or functionalities to the repository.  

2. **Documentation Enhancements**  
   Proposals to improve guides, examples, API references, or overall clarity in documentation.  

3. **Infrastructure Discussions**  
   Topics related to optimizing workflows, build systems, CI/CD pipelines, or other infrastructural components.  

4. **Design and Workflow Suggestions**  
   Ideas to improve user interfaces, streamline collaboration workflows, or refine existing designs.  

5. **Experimental Features**  
   Feedback or recommendations for testing, refining, or introducing experimental functionalities.  

6. **Usability Enhancements**  
   Improvements related to visual interfaces, optional configurations, or debugging aids like better error messages or logging.  

7. **Community and Support Proposals**  
   Suggestions to improve community engagement, such as adding demos, examples, or resources for easier onboarding.
"""

In [None]:
"""
### **Category: other**

The **other** category encompasses issues, feature requests, or discussions that are not directly related to errors, performance bottlenecks, deployment challenges, or user misunderstandings. This category focuses on general enhancements, community-driven proposals, or infrastructural improvements aimed at enriching the repository's overall ecosystem.

#### **Includes**:
1. **Feature Enhancements**  
   Suggestions for adding new functionalities, improving existing features, or modifying workflows to better align with user needs.

2. **Documentation Updates**  
   Proposals to improve guides, tutorials, API references, or other forms of documentation to enhance clarity and usability.

3. **Community Contributions**  
   Discussions or requests related to promoting community engagement, such as creating demos, localization efforts, or building partnerships.

4. **Infrastructure Optimizations**  
   Topics on improving build systems, CI/CD pipelines, dependency management, or repository structure.

5. **Design Proposals**  
   Ideas for improving user interfaces, streamlining collaboration processes, or refining existing designs for better usability.

6. **General Usability Enhancements**  
   Requests or feedback related to improving debugging tools, error messages, optional configurations, or adding utilities for developer productivity.

7. **Support for Emerging Technologies**  
   Requests to adapt the repository for new platforms, tools, or standards in line with technological advancements.
"""

In [None]:
"""
### **Category: Other**

The **other** category encompasses topics or discussions that do not directly align with errors, performance bottlenecks, deployment challenges, or user misunderstandings. It focuses on broader aspects that enrich the repository's ecosystem, usability, or functionality.

#### **Includes**:
1. **Feature Requests**  
   Suggestions for new functionalities, enhanced tools, or modifications to existing workflows that align with user needs or technological trends.

2. **Usability Enhancements**  
   Proposals to improve accessibility, UI/UX, or interaction flows within the repository.

3. **Design Improvements**  
   Ideas for refining visual design, layout, or branding elements to align with modern standards or project goals.

4. **Documentation Updates**  
   Requests or contributions to enhance clarity, add examples, or expand explanations in guides, API references, or tutorials.

5. **Community Engagement**  
   Initiatives aimed at fostering collaboration, such as organizing localization efforts, promoting best practices, or facilitating external contributions.

6. **Infrastructure and Tooling Improvements**  
   Enhancements to build systems, CI/CD pipelines, dependency management, or repository structure for smoother development processes.

7. **Support for Emerging Technologies**  
   Discussions or requests to adapt the repository to new tools, platforms, or standards in line with advancements in the field.

8. **General Suggestions**  
   Feedback, exploratory ideas, or speculative discussions that do not fit into other predefined categories.
"""

In [None]:
"""
### **Category: Other**

The **other** category captures topics or discussions that extend beyond specific technical errors, performance challenges, deployment processes, or direct user misunderstandings. It includes issues that enhance the repository's overall ecosystem, functionality, and accessibility.

#### **Includes**:
1. **Feature Requests**  
   Proposals for new functionalities, improved tools, or modifications to existing workflows aligned with user needs or emerging technologies.

2. **Documentation Enhancements**  
   Suggestions to improve clarity, structure, or accessibility of guides, API references, or tutorials, as well as adding illustrative examples or expanding explanations.

3. **Design and Usability Improvements**  
   Ideas to refine UI/UX design, interaction flows, or the overall accessibility of the repository's features.

4. **Infrastructure and Tooling Improvements**  
   Discussions about refining CI/CD pipelines, dependency management, build systems, or other infrastructural elements that enhance development processes.

5. **Community and Collaboration Initiatives**  
   Efforts to foster community engagement, such as localization, promoting best practices, or encouraging external contributions.

6. **Integration and Compatibility Discussions**  
   Requests or ideas for compatibility with external tools, libraries, or platforms to align the repository with broader industry trends.

7. **General Feedback or Suggestions**  
   Exploratory ideas, speculative discussions, or feedback that doesn’t fit into other predefined categories but contribute to the repository's growth and improvement.
"""

In [None]:
"""
### **Category: Other**

The **other** category encompasses discussions and issues that contribute to the broader functionality, usability, or ecosystem of the repository, which do not align specifically with errors, performance bottlenecks, deployment challenges, or user misunderstandings.

#### **Includes**:
1. **Feature Requests**  
   Suggestions for new tools, enhancements to existing features, or additional functionalities to meet user needs or improve workflows.

2. **Documentation Updates**  
   Requests or ideas for improving guides, API references, tutorials, or adding examples for better understanding.

3. **Design and Usability Enhancements**  
   Proposals to refine user interface design, improve user experience, or enhance the accessibility of repository features.

4. **Infrastructure and Tooling Improvements**  
   Discussions related to optimizing CI/CD pipelines, dependency management, or development tooling for better efficiency.

5. **Community Engagement and Collaboration**  
   Initiatives to strengthen community interaction, promote best practices, or encourage contributions, including surveys or outreach efforts.

6. **Integration and Compatibility Enhancements**  
   Ideas for expanding compatibility with external tools, platforms, or libraries to align with evolving industry standards.

7. **Exploratory Ideas and General Feedback**  
   Speculative discussions, broad feedback, or exploratory suggestions for the repository's growth that don’t fit into predefined categories.
"""

In [None]:
"""
### **Category: Other**

The **other** category encompasses topics, discussions, and suggestions that enhance the overall functionality, usability, or ecosystem of the repository but do not align with specific categories such as errors, performance bottlenecks, deployment challenges, or user misunderstandings.

#### **Includes**:
1. **Feature Requests**  
   Proposals for adding new features, enhancing existing functionality, or introducing tools to meet user needs or improve workflows.

2. **Documentation Updates**  
   Suggestions for improving guides, tutorials, API references, or examples for better user understanding and accessibility.

3. **Design and Usability Improvements**  
   Ideas to refine the user interface, improve user experience, or enhance accessibility.

4. **Infrastructure and Tooling Enhancements**  
   Discussions related to improving CI/CD pipelines, dependency management, or developer tools to streamline efficiency.

5. **Community Engagement Initiatives**  
   Activities or strategies aimed at fostering collaboration, promoting best practices, or encouraging contributions.

6. **Integration and Compatibility Suggestions**  
   Proposals for expanding support or compatibility with external tools, platforms, or libraries.

7. **General Feedback and Speculative Ideas**  
   Broad feedback, exploratory discussions, or speculative suggestions for the repository's growth and future directions.
"""

In [None]:
"""
### **Category: Other**

The **other** category includes discussions, requests, and suggestions that enhance the functionality, usability, or ecosystem of the repository but do not align with specific categories like errors, performance, deployment, or user misunderstandings. This category serves as a flexible space for improvements, expansions, and feedback.

#### **Includes**:
1. **Feature Requests**  
   Proposals for introducing new features, improving existing ones, or integrating tools to meet user needs or enhance workflows.

2. **Documentation Enhancements**  
   Suggestions for improving guides, tutorials, API references, or examples to improve user accessibility and comprehension.

3. **Design and Usability Improvements**  
   Ideas to refine user interfaces, enhance user experience, or improve accessibility for diverse audiences.

4. **Infrastructure and Tooling Upgrades**  
   Discussions on improving CI/CD pipelines, dependency management, or developer tools to streamline project efficiency.

5. **Community Building and Engagement**  
   Initiatives or strategies aimed at encouraging collaboration, fostering best practices, or expanding community involvement.

6. **Integration and Compatibility Enhancements**  
   Suggestions to expand support for or compatibility with external tools, platforms, or libraries.

7. **Feedback and Growth Ideas**  
   Broad feedback, speculative discussions, or ideas aimed at the long-term growth and direction of the repository.

8. **Experimental or Prototype Proposals**  
   Requests or discussions about experimental features or prototype implementations to explore new capabilities.

9. **Process and Workflow Suggestions**  
   Proposals to optimize development workflows, contribution guidelines, or operational processes.

10. **Policy or Licensing Questions**  
   Inquiries or proposals regarding repository policies, licensing, or governance.
"""

In [None]:
"""
### **Category: Other**

The **other** category encompasses discussions, requests, and suggestions that contribute to enhancing the repository's functionality, usability, ecosystem, or community engagement. These do not fall into the more specific categories of errors, performance issues, deployment challenges, or misunderstandings but are nonetheless critical to the repository's growth and user experience.

#### **Basic Definition**:
This category is a flexible space for contributions, feedback, and discussions that address aspects of the repository beyond immediate technical concerns or coding issues.

#### **Includes**:
1. **Feature Requests**  
   Proposals for adding new features, improving existing functionalities, or integrating tools to meet evolving user needs.

2. **Documentation Enhancements**  
   Suggestions for improving guides, tutorials, examples, or API references to facilitate better understanding and accessibility for users.

3. **Design and Usability Improvements**  
   Ideas to refine interfaces, improve user workflows, or enhance accessibility for a diverse range of users.

4. **Infrastructure and Tooling Discussions**  
   Feedback or suggestions about CI/CD pipelines, dependency management, or developer tools to improve development efficiency.

5. **Community Building and Engagement**  
   Initiatives or discussions aimed at fostering collaboration, expanding user contributions, or enhancing the repository's community dynamics.

6. **Integration and Compatibility**  
   Suggestions or discussions on ensuring compatibility with external tools, platforms, or libraries.

7. **Feedback on Growth and Vision**  
   Broad feedback or strategic suggestions focused on the repository's long-term growth, direction, and innovation.

8. **Experimental and Prototyping Ideas**  
   Discussions or requests to explore experimental features or prototype implementations to test new capabilities.

9. **Process and Workflow Optimization**  
   Proposals for improving operational processes, contribution guidelines, or development workflows.

10. **Policy and Licensing Discussions**  
    Inquiries or suggestions regarding the repository's licensing, governance, or operational policies.

11. **General Support and Queries**  
    User inquiries seeking clarification or guidance on using the repository, understanding its structure, or exploring its capabilities, when not directly related to errors or technical performance.

12. **Platform and Ecosystem Expansion**  
    Requests or discussions about extending the repository's use cases, such as adapting for new platforms, datasets, or scenarios.
"""

In [None]:
"""
### Revised Definition for the **Other** Category (Enhanced)

**Category: Other**

The **other** category encompasses discussions, requests, and suggestions that contribute to enhancing the repository's functionality, usability, ecosystem, or community engagement. This category is a flexible space for contributions, feedback, and queries that do not fit neatly into specific categories such as errors, performance issues, deployment challenges, or misunderstandings. Its purpose is to capture and support discussions that address broader aspects of repository growth, strategy, and user experience.

#### **Basic Definition**
The **other** category includes topics that enhance the repository beyond direct code issues, focusing on broader improvements, strategic directions, or community-driven enhancements.

#### **Includes**
1. **Feature Requests**
   - Suggestions for adding new capabilities, tools, or functionalities.
   - Enhancements to existing features to meet evolving user needs.

2. **Documentation and Educational Content**
   - Proposals for creating or improving guides, tutorials, examples, or API references.
   - Requests for detailed explanations or additional clarification on existing materials.

3. **Design and Usability Enhancements**
   - Ideas for refining interfaces, improving workflows, or boosting accessibility.

4. **Infrastructure and Tooling Feedback**
   - Suggestions to improve CI/CD pipelines, dependency management, or development tools.
   - Recommendations for better compatibility with third-party integrations.

5. **Community Building and Collaboration**
   - Discussions to foster community engagement, user contributions, and collaborative initiatives.
   - Requests for new communication channels, such as forums, Slack, or gitter.

6. **Integration and Ecosystem Expansion**
   - Requests for ensuring compatibility with external tools, platforms, datasets, or libraries.
   - Proposals to extend repository use cases to new domains.

7. **Feedback on Repository Vision and Growth**
   - Strategic suggestions for long-term growth and innovation.
   - Contributions aiming to align with the broader goals of the repository.

8. **Experimental and Prototyping Proposals**
   - Discussions around testing new features, experimental ideas, or prototype implementations.

9. **Process and Workflow Improvements**
   - Suggestions for optimizing contribution guidelines, issue triaging, or development processes.

10. **Policy and Licensing Inquiries**
    - Requests for clarification or suggestions regarding repository governance, licensing, or policies.

11. **General Support and Queries**
    - Inquiries for assistance or clarification not tied directly to technical issues.
    - Questions that span multiple areas of functionality or repository design.

12. **Platform and Feature Expansion**
    - Discussions about adapting the repository for new use cases, languages, or platforms.
    - Requests for additional datasets, pretrained models, or resource-specific enhancements.
"""

In [None]:
"""
### Revised Definition for the **Other** Category (Expanded and Enhanced)

**Category: Other**

The **other** category is a flexible and inclusive classification designed to capture issues, discussions, and contributions that do not fall into specific predefined categories such as errors, performance problems, deployment challenges, or questions about repository functionality. This category encompasses a broad range of topics aimed at enhancing the repository's overall ecosystem, usability, and engagement.

#### **Basic Definition**
The **other** category includes topics that address broader improvements, strategic discussions, feature suggestions, or general contributions that contribute to the repository's growth and user experience.

#### **Includes**
1. **Feature Requests and Enhancements**
   - Suggestions for adding new capabilities, tools, or functionalities.
   - Proposals for refining or improving existing features to better meet user needs.

2. **Documentation Improvements**
   - Requests for creating or enhancing guides, tutorials, examples, or reference materials.
   - Proposals for clarifying or expanding on existing documentation.

3. **Design and Usability Proposals**
   - Suggestions to improve interfaces, workflows, or accessibility.
   - Feedback on design elements to enhance the user experience.

4. **Community and Collaboration Initiatives**
   - Discussions to foster user engagement, community building, and collaborative efforts.
   - Proposals for creating new communication channels or improving existing ones.

5. **Integration and Compatibility Feedback**
   - Suggestions for ensuring compatibility with external tools, platforms, datasets, or APIs.
   - Requests for extending repository support to new environments, domains, or use cases.

6. **Strategic Vision and Growth**
   - Contributions that align with the long-term goals and innovative direction of the repository.
   - Feedback on roadmaps, priorities, and potential areas of expansion.

7. **Infrastructure and Tooling**
   - Proposals to improve CI/CD pipelines, dependency management, or development workflows.
   - Feedback on compatibility with specific hardware, software versions, or third-party integrations.

8. **Educational and Training Resources**
   - Requests for or contributions of resources such as training datasets, model examples, or educational tools.
   - Proposals for expanding the repository's educational outreach or training capabilities.

9. **Experimental Ideas and Prototyping**
   - Discussions around testing new concepts, experimental features, or prototype implementations.
   - Feedback on experimental releases or beta testing results.

10. **Policy, Licensing, and Governance**
    - Inquiries or feedback regarding the repository's policies, licensing terms, or governance structure.
    - Suggestions for enhancing transparency or community-driven decision-making.

11. **Platform Expansion and Ecosystem Building**
    - Requests or suggestions to adapt the repository for additional platforms, languages, or use cases.
    - Feedback on building a robust ecosystem around the repository.

12. **General Inquiries and Support**
    - Questions or requests that span multiple areas of functionality or require broader context.
    - Topics that do not fit neatly into a single category but contribute to the repository's growth.

13. **Process and Workflow Optimization**
    - Suggestions for improving contribution guidelines, issue triaging, or developer onboarding.
    - Proposals to streamline processes for better collaboration and efficiency.

14. **Feedback on Timeliness and Engagement**
    - Suggestions for improving the responsiveness of issue resolutions or community interactions.
    - Requests for updates on roadmap progress or feature development timelines.
"""

In [None]:
"""
### Revised Definition for the **Other** Category (Comprehensive and Inclusive)

**Category: Other**

The **Other** category is a flexible classification designed to encompass a broad range of issues, discussions, or contributions that do not fall neatly into predefined categories such as errors, performance problems, deployment issues, or user questions. This category is essential for ensuring that all valuable input contributing to the repository's growth, usability, or functionality is acknowledged and addressed appropriately.

#### **Basic Definition**
The **Other** category includes any topics that focus on enhancements, strategic considerations, feature suggestions, or contributions that improve the repository's ecosystem, capabilities, or community engagement.

#### **Includes**
1. **Feature Requests and Enhancements**
   - Suggestions for new tools, functionalities, or capabilities.
   - Proposals to refine or improve existing features to better serve user needs.

2. **Documentation and Educational Resources**
   - Requests for creating or improving guides, tutorials, or reference materials.
   - Suggestions for adding clarity or expanding the existing documentation to enhance usability.
   - Proposals for new training datasets or educational tools related to the repository.

3. **Design and Usability Improvements**
   - Feedback on user experience, workflows, or interface accessibility.
   - Suggestions for improving the design or usability of existing features.

4. **Community and Collaboration Initiatives**
   - Proposals to foster community engagement and collaboration.
   - Discussions on creating or improving communication channels or community governance structures.

5. **Strategic Discussions and Vision**
   - Contributions aligning with long-term goals or innovative directions for the repository.
   - Feedback on roadmaps, priorities, and expansion opportunities.

6. **Integration and Compatibility**
   - Suggestions for improving compatibility with external tools, platforms, datasets, or APIs.
   - Requests to adapt the repository for specific environments, hardware, or use cases.

7. **Experimental Features and Prototyping**
   - Discussions about testing new concepts or experimental features.
   - Feedback on beta releases or prototype implementations.

8. **Infrastructure, CI/CD, and Tooling**
   - Proposals to enhance build systems, testing pipelines, or development workflows.
   - Feedback on optimizing the repository's infrastructure or supporting tools.

9. **Policy, Licensing, and Governance**
   - Inquiries or feedback on policies, licensing terms, or governance.
   - Suggestions to improve transparency or user collaboration in decision-making.

10. **Dataset Availability and Ethical Concerns**
    - Discussions about accessibility and availability of critical datasets.
    - Feedback on ethical considerations related to the repository or associated technologies.

11. **Platform and Ecosystem Expansion**
    - Proposals to extend the repository's functionality to additional platforms, programming languages, or ecosystems.
    - Feedback on ecosystem-building efforts around the repository.

12. **General Contributions and Support**
    - Contributions that span multiple categories or do not fit into a specific predefined area.
    - General feedback aimed at improving repository processes or user engagement.

13. **Feedback on Community Timeliness and Responsiveness**
    - Suggestions to improve the speed of issue resolution or feature implementation.
    - Requests for updates on roadmap milestones or development progress.

14. **Broad Improvement Proposals**
    - Discussions aimed at enhancing repository structure, communication, or inclusivity.
    - Suggestions that influence the overall growth or usability of the repository.
"""

In [None]:
"""
### Revised Definition for the **Other** Category

- **Other**: 
    1. Topics that do not fall under any predefined category.
    2. Issues or discussions unrelated to the repository, such as general inquiries, off-topic discussions, or irrelevant submissions.


    #### **Includes**
    1. **Feature Development and Testing**
       - Suggestions for new functionalities, enhancements, or prototypes.
       - Feedback on beta features or experimental implementations.
    
    2. **Documentation, Resources, and Ethical Concerns**
       - Proposals for improving guides, tutorials, or reference materials.
       - Discussions on dataset accessibility, ethical considerations, and educational tools.
    
    3. **User Experience and Compatibility**
       - Suggestions for improving design, usability, and interface accessibility.
       - Feedback on compatibility with external tools, platforms, and APIs.
    
    4. **Community and Collaboration**
       - Proposals to improve community engagement, governance, and responsiveness.
       - Feedback on collaboration tools and communication channels.
    
    5. **Strategy and Ecosystem Development**
       - Contributions aligning with long-term goals, roadmaps, or ecosystem expansion.
       - Discussions on strategic priorities and platform integration opportunities.
    
    6. **Infrastructure and Governance**
       - Suggestions for improving build systems, CI/CD pipelines, or development workflows.
       - Feedback on policies, licensing, and governance structures.
    
    7. **Broad Improvement Proposals**
       - Contributions focused on enhancing repository structure, inclusivity, or communication.
       - Suggestions with overarching impact on repository growth and usability.
    
    8. **Miscellaneous Contributions**
       - Topics that add value to the repository but do not fit into predefined categories.
       - Issues unrelated to the repository’s purpose, including off-topic or irrelevant submissions.
"""