In [1]:
import fire
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline
import torch
import os
from docx import Document
from tqdm.auto import tqdm
import pandas as pd
import ast



# Коллекция промптов

In [2]:
import gc
import torch
torch.cuda.empty_cache()
gc.collect()
!nvidia-smi

Sun Nov 10 09:44:19 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 560.94                 Driver Version: 560.94         CUDA Version: 12.6     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 3060      WDDM  |   00000000:01:00.0  On |                  N/A |
|  0%   46C    P2             37W /  170W |    4205MiB /  12288MiB |      1%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

# Иcпользуемый prompt

In [7]:
#Score - 0,77
SYSTEM_PROMPT = """
You are a language model tasked with evaluating how closely a technical specifications document (SSTS) aligns with a requirements document (UC). The input format is:

UC: {scenario requirements content}
SSTS: {technical specifications for implementation}

Definitions:

    UC: The requirements document detailing the system’s scenario requirements, including preconditions, main scenarios, postconditions, and alternative scenarios.
    SSTS: The technical document outlining the system’s functionality in a structured textual description.

Your task is to compare SSTS against UC and assess compliance based on the following criteria. Provide your response only in the specified dictionary format, with no introductory or closing text. Use these keys:

    Name: Extract and include the name of the UC document.

    Differences: Highlight key mismatches or omissions in SSTS relative to UC, focusing on:
        - Missing or different output devices.
        - Discrepancies in user interaction elements, like control methods or UI components.
        - Missing functional details such as status displays, error handling, or audio requirements.
    Compliance Level: Indicate the compliance level of SSTS with UC using one of these codes:
    
    **FC - Fully Compliant**  
    The SSTS document fully aligns with all functional requirements in the UC document, with no missing components or deviations. Every requirement, scenario, and interaction outlined in UC is addressed in the SSTS document with precise alignment. Use this rating only if the SSTS document provides complete and exact coverage of UC requirements, meaning that no revisions are necessary.
    
    **LC - Largely Compliant**

    The SSTS document meets **most** of the functional requirements outlined in the UC document. However, there are **minor deviations** that do not substantially impact the core functionality, usability, or effectiveness of the system. These deviations are typically related to **small differences in descriptions, wording, or non-essential elements**. The key features and requirements defined in the UC document are largely represented in the SSTS, but the implementation may include **slight inconsistencies** or **missing clarifications** in specific areas.
    
    ### When to use **LC**:
    - **Minor inconsistencies**: For example, the SSTS might describe the same functionality as in the UC but with slight wording or detail differences that do not affect the overall outcome.
    - **Non-critical details**: Certain optional elements, like UI components, specific error handling behaviors, or output devices, might not be perfectly aligned with the UC but are not essential to the system’s core operations.
    - **Minor omissions**: Some non-critical requirements, such as specific display formats or additional user interactions, may be omitted or underdefined in the SSTS but do not significantly impact the user experience or system function.
    
    ### Examples of deviations that justify **LC**:
    - A **UI control** might be described in the UC as a dropdown menu, while in the SSTS, it could be described as a simple text field—still functional, but not identical.
    - **Minor output differences**, such as using “mobile speakers” instead of “vehicle speakers”, which do not change the fundamental interaction but could require minor clarification or adjustment.
    - A **slightly different control method** for user interaction, such as replacing a manual button with a voice command for the same task.
    
    ### Recommended Actions:
    The SSTS document does **not** require a complete overhaul but would benefit from **minor refinements**. These may include:
    - Clarifying language or descriptions for greater alignment with UC.
    - Adding or refining optional features, UI elements, or error-handling mechanisms.
    - Minor adjustments to output devices or interaction methods to fully match the UC specification.

    **PC - Partially Compliant**  
    The SSTS document addresses some of the UC requirements but has notable gaps or deviations that could affect intended functionality or usability. These gaps may include missing functional elements, major differences in interaction design, or unaddressed scenarios that are critical to UC. Use this rating if the SSTS covers certain functionalities but falls short on others, requiring specific revisions to achieve alignment with the UC document.
    
    **NC - Non-Compliant**

    The SSTS document **fails to align** with the key functional requirements of the UC document, exhibiting **significant deviations** in areas critical to the system’s performance or usability. These misalignments can include the absence of foundational functional elements, major differences in interaction design, or completely unaddressed scenarios that are essential for meeting UC requirements. The SSTS document, in its current form, **lacks the necessary depth and accuracy** to fulfill the core expectations of the UC and would require **extensive revisions** to meet the outlined specifications.
    
    ### When to use **NC**:
    - **Major functional omissions**: If the SSTS document is missing critical functional requirements, such as the inability to support basic user interactions or scenarios that are defined in the UC.
    - **Severe interaction misalignments**: For instance, if the interaction design in the SSTS deviates significantly from the UC document, such as drastically different workflows or unaddressed user behavior scenarios.
    - **Unaddressed UC requirements**: If certain core features or requirements outlined in the UC are completely absent or inaccurately represented in the SSTS, causing the system to fail in fulfilling the expected functions.
    
    ### Examples of deviations that justify **NC**:
    - **Missing core functionality**: A system feature described in the UC, such as an essential user authentication process or data validation step, is entirely absent or inaccurately described in the SSTS.
    - **Incorrect interaction design**: The UC specifies a multi-step process for user input, but the SSTS describes a completely different or overly simplified flow that would break user expectations or reduce usability.
    - **Incomplete error handling**: The UC defines specific error scenarios and recovery processes, but the SSTS either omits these completely or includes incorrect solutions that fail to resolve critical issues in practice.
    
    ### Recommended Actions:
    The SSTS document requires **substantial revisions** to align with the UC. Key areas for improvement include:
    - **Addressing missing functionality**: Ensuring that all functional elements and user interaction scenarios defined in the UC are included and accurately described.
    - **Revising interaction flows**: Updating the interaction designs to reflect the UC specifications, ensuring that the user experience aligns with UC expectations.
    - **Improving error handling**: Ensuring that error scenarios and recovery methods are fully defined and consistent with the UC, preventing critical system failures.
      
    In summary, **NC** indicates that the SSTS is significantly misaligned with the UC and cannot be considered compliant without major revisions. It is a signal that foundational changes are necessary to bring the two documents into alignment.

    **NA - Not Applicable**  
    The SSTS and UC documents are fundamentally incomparable due to a clear difference in purpose, scope, or content. This rating should be used only if it is evident that the UC requirements are outside the scope or relevance of the SSTS document. When applying this rating, provide a brief explanation of why a comparison is not feasible.
    
    Be sure to assess the SSTS against UC based on the descriptions of requirements, scenarios, devices, interactions, outputs, error handling, and any other relevant details. The compliance level should reflect the overall alignment of the documents in terms of both completeness and accuracy.

Return your response *only* in the following dictionary format, with no additional text:

{'Name': 'name of the UC document','Differences': 'Summary of key differences here', 'Description': 'UC - Key requirements from the interface scenario, SSTS - Description of corresponding content in the technical document','Compliance Level': 'FC, LC, PC, NC, or NA'}

Example format:

{'Name': 'UC Example Document', 'Differences': 'Summary of key differences between SSTS and UC', 'Description': 'UC - Key requirements missing from the technical document, SSTS - Relevant technical document details','Compliance Level': 'FC'}"""


# Решение

In [8]:
# Модели которые тестили
lst = ["chuanli11/Llama-3.2-3B-Instruct-uncensored",
'NousResearch/Hermes-3-Llama-3.1-8B',
'01-ai/Yi-Coder-9B-Chat',
      'Qwen/Qwen2.5-1.5B-Instruct',
      'TinyLlama/TinyLlama-1.1B-Chat-v1.0',
      'georgesung/llama2_7b_chat_uncensored']


In [9]:
model_path = '01-ai/Yi-Coder-9B-Chat'  # https://huggingface.co/
tokenizer = AutoTokenizer.from_pretrained(model_path)

# Блок квантования
# # Configure 8-bit quantization
# quantization_config = BitsAndBytesConfig(
#     load_in_8bit=True, # Используется 8-битное квантован
#     bnb_8bit_quant_type='nf4',  # 'nf4'
# )

# # # # Configure 4-bit quantization
quantization_config = BitsAndBytesConfig(
    load_in_8bit=False,  # Не используется 8-битное квантование
    load_in_4bit=True,   # Включаем 4-битное квантование
    bnb_4bit_quant_type='fp4',  # 4-битное квантование с плавающей точкой
)

# Load model with quantization configuration
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    quantization_config=quantization_config,
    torch_dtype=torch.bfloat16,
    device_map="auto"
)

text_generator = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    model_kwargs={"torch_dtype": torch.bfloat16},  # Использование bfloat16 в пайплайне
    device_map="auto",                             # Автоматическое распределение модели по устройствам
    pad_token_id=tokenizer.eos_token_id,
    truncation=True,
)

# Функция для инференса LLM
def interact(
    user_message,
    system_prompt=SYSTEM_PROMPT,
    # top_k=30,
    # top_p=0.9,
    temperature=0.3,  # Температура модели
    # repetition_penalty=1.1,
    max_length=4000
):

    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_message},
    ]
    
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    model_inputs = tokenizer([text], return_tensors="pt") #.to(device)
    model_inputs = model_inputs.to(model.device)
    pad_token_id = tokenizer.pad_token_id if tokenizer.pad_token_id is not None else tokenizer.eos_token_id
    
    
    generated_ids = model.generate(
        model_inputs.input_ids,
        max_new_tokens=1024,
        temperature=temperature,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=pad_token_id
    )
    generated_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]
    
    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
    return response

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [10]:
PATH_DATASETS = 'datasets/train Atom/train data/'
PATH_DATASETS = 'datasets/test_dataset_atom_test/test Атом/test data/'

HMI_path = PATH_DATASETS + 'HMI'  # путь к файлам UC
SSTS_path = PATH_DATASETS + 'SSTS' # путь к файлам SSTS

HMI_fail = os.listdir(HMI_path)
SSTS_fail = [f'SSTS-{os.path.splitext(f)[0][3:]}.docx' for f in os.listdir(HMI_path)]
len(HMI_fail)

15

In [12]:
# функция чтения файлов
def read_document(path):
    doc = Document(path)
    full_text = []
    for para in doc.paragraphs:        
        full_text.append(para.text)

    full_text = '\n'.join(full_text)
    return full_text

In [13]:
#Генерируем датафрейм
df=pd.DataFrame()
x=''
for HMI, SSTS in zip(tqdm(HMI_fail), SSTS_fail):
    
    path1 = f'{HMI_path}/{HMI}'
    path2 = f'{SSTS_path}/{SSTS}'

    # Проверяем наличие SSTS
    if not os.path.exists(path2):
        data = {
                'Number': os.path.splitext(HMI)[0][3:],
                'Name': Document(path1).paragraphs[0].text,
                'Differences': "ssts hasn't info about this",
                'Description': '-',
                'Compliance Level': 'NA'
            }
        df = pd.concat([df, pd.DataFrame([data])])
        continue
        
    text1 = read_document(path1)
    text2 = read_document(path2)

    while True:  #Цикл до успешного выполнения заданного выхода модели
        try:
            res = interact(f'text1: {text1}\n\n\ntext2: {text2}', SYSTEM_PROMPT)
            cleaned_text = res.strip('```python\n').strip('\n```').replace('```','').strip()
            data = ast.literal_eval(cleaned_text)
            data['Number'] = os.path.splitext(HMI)[0][3:]
            df = pd.concat([df, pd.DataFrame([data])])
            break  # Если всё прошло успешно, выходим из цикла
        except Exception as e:
            print(f"Ошибка при обработке {HMI} и {SSTS}: {e}")
            x=res
        
df

  0%|          | 0/15 [00:00<?, ?it/s]

  attn_output = torch.nn.functional.scaled_dot_product_attention(


Unnamed: 0,Name,Differences,Description,Compliance Level,Number
0,UC Vehicle Access Management,The SSTS does not explicitly mention the revoc...,UC - The system needs to support the revocatio...,PC,114671
0,UC 25957 - Mute/Unmute FM Radio,The SSTS does not clearly describe the mute/un...,UC - The user needs to be able to mute/unmute ...,PC,259571
0,UC Example Document,The SSTS does not clearly describe the mute/un...,UC - Key requirements around the mute/unmute f...,PC,259572
0,UC Document,1. Missing output devices: The UC mentions the...,UC - Key requirements about switching FM Radio...,PC,261611
0,Configure Heat Preservation,The SSTS does not clearly outline the function...,UC - The user needs to be able to configure he...,PC,29448
0,UC: Maximum charging SOC value setting,Missing functional details such as status disp...,UC - Key requirements such as setting the maxi...,LC,30364
0,"Use-Case Title: ""Start the Charging Process vi...",Missing functional details such as error handl...,UC - Key requirements from the interface scena...,NC,30365
0,I-30370 Stop the discharging process,The SSTS does not clearly align with all requi...,UC - Key requirements such as enabling conditi...,LC,30370
0,UC: Internet Radio Operations,The SSTS does not clearly outline the function...,UC - User wants to add favorite internet radio...,LC,315231
0,UC Driver Initiate a Call,SSTS does not clearly align with the UC in ter...,UC - Driver needs to initiate a call through S...,PC,65831


In [15]:
df = df.rename(columns={'Compliance Level': 'Complience Level'})
df['Complience Level'] = df['Complience Level'].apply(lambda x: x[0] if type(x) == list else x)
df['Number'] = df['Number'].astype(int)

In [16]:
x

''

Unnamed: 0,Number,Name,Differences,Description,Complience Level
0,114671,UC Vehicle Access Management,The SSTS does not explicitly mention the revoc...,UC - The system needs to support the revocatio...,PC
0,259571,UC 25957 - Mute/Unmute FM Radio,The SSTS does not clearly describe the mute/un...,UC - The user needs to be able to mute/unmute ...,PC
0,259572,UC Example Document,The SSTS does not clearly describe the mute/un...,UC - Key requirements around the mute/unmute f...,PC
0,261611,UC Document,1. Missing output devices: The UC mentions the...,UC - Key requirements about switching FM Radio...,PC
0,29448,Configure Heat Preservation,The SSTS does not clearly outline the function...,UC - The user needs to be able to configure he...,PC
0,30364,UC: Maximum charging SOC value setting,Missing functional details such as status disp...,UC - Key requirements such as setting the maxi...,LC
0,30365,"Use-Case Title: ""Start the Charging Process vi...",Missing functional details such as error handl...,UC - Key requirements from the interface scena...,NC
0,30370,I-30370 Stop the discharging process,The SSTS does not clearly align with all requi...,UC - Key requirements such as enabling conditi...,LC
0,315231,UC: Internet Radio Operations,The SSTS does not clearly outline the function...,UC - User wants to add favorite internet radio...,LC
0,65831,UC Driver Initiate a Call,SSTS does not clearly align with the UC in ter...,UC - Driver needs to initiate a call through S...,PC


In [19]:
#Сохранения сабмита
df[['Number', 'Name', 'Differences', 'Description', 'Complience Level']].reset_index().drop('index',axis=1).to_csv('submission_v2_1.csv', index = False)

# Проверка scora Traina

In [21]:
# !pip freeze > requirements.txt

In [18]:
# gt = pd.read_excel('train data/train_data_markup.xlsx', keep_default_na=False)
# gt = pd.read_excel('train data/train_data_markup.xlsx', keep_default_na=False)
gt = pd.read_excel("datasets/train Atom/" + 'train data/train_data_markup.xlsx', keep_default_na=False)
gt

Unnamed: 0,Number,Name,Differences,Description,Complience Level
0,8800,Receiving Call Notifications,SSTS misses the following content:\n The outpu...,HMX description:\nThe driver can accept or rej...,LC
1,31523,Adding Internet Radio to Favorites List,UC misses the following content:\nUsers can se...,Users can add or remove songs from favorites a...,LC
2,25957,Mute/unmute the FM Radio playback,There is no soft button icon and description i...,The user clicks on the radio and clicks on the...,PC
3,26160,Filtering the internet radio stations,ssts hasn't info about this,-,
4,11467,Revoke access to the vehicle from a driver or ...,-,The driver opens the ATOM application on his i...,FC
5,30371,ERA Self-diagnosis,SSTS starts Every time the ignition switch is ...,Self-diagnosis includes the following items:\n...,LC
6,26771,Turn on and off hotspot via VA,SSTS misses the following content:\n VA will i...,Users can turn on or off the vehicle hotspot a...,LC
7,28561,Setting Hotspot name & password,UC misses the following content: \n This funct...,Users can modify the name and password of the ...,LC
8,8604,Source selection for Media output,No online&BLT music HMI button guide.\nThis us...,Support USB/SD/online/BLT media type\nAtom car...,NC
9,6583,Driver initiate a call through SWP,SSTS describes that only when the vehicle is s...,Additionally SSTS describes:\n- a numeric keyb...,NC


In [21]:
gt['Differences'].loc[10]

'UC misses the folowimg content:\n Only when the vehicle is stopped can calls be made through SWP.\n Users play the radio and use voice to search for radio stations'

In [22]:
from sklearn.metrics import mean_squared_error
def calc_score(gt, sub):
  sub = sub.drop_duplicates(subset='Number', keep='last')

  mapping = {'FC': 1, 'LC': 2, 'PC': 3, 'NC': 4, 'NA': 5}
  gt['категории_числа'] = gt['Complience Level'].map(mapping)
  sub['категории_числа'] = sub['Complience Level'].map(mapping)


  merge_df = pd.merge(gt, sub, on='Number', how='left')
  merge_df['категории_числа_y'] = merge_df['категории_числа_y'].fillna(mapping['NA'])
  mse = mean_squared_error(merge_df['категории_числа_x'], merge_df['категории_числа_y'])

  score = max(0, 1.5-mse)/1.5
  print(mse)

  return score


In [23]:
calc_score(gt, df)

0.6666666666666666


0.5555555555555556