In [None]:
!pip install fuzzywuzzy
!pip install sentence_transformers

Collecting sentence_transformers
  Downloading sentence_transformers-3.2.0-py3-none-any.whl.metadata (10 kB)
Downloading sentence_transformers-3.2.0-py3-none-any.whl (255 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m255.2/255.2 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sentence_transformers
Successfully installed sentence_transformers-3.2.0


## Fuzzywuzzy


In [1]:
from fuzzywuzzy import process

# Заданный список слов
words = ["EMC", "Doors", "Steering mechanism", "Braking", "Safety belt",
         "Seats", "Audible warning devices", "Speedometer and odometer",
         "Steering equipment", "Heating system", "AVAS", "wipe and wash"]

# Текст, в котором нужно искать слова
text = """
I-24205
OC Section: [F-7600] New OC Section: 09 Navigation and Entertainment / Acoustic Vehicle Alerting System (AVAS)
Description:
Title: Vehicle Alerting System fault
Goal: To provide granted indications that AVAS function has problems
Scope: AVAS and other car systems
Actors: N/A
Preconditions: AVAS operating normally
Trigger: AVAS has malfunction
Main scenario: N/A
Postconditions:
- SWP out_2 displays msg
- Push notification inside Atom mobile app out_5 and out_44 ATOM HUB
Priority: Normal
Type: Use Case CF
"""

# Получаем список слов из текста
found_words = set()
for word in words:
    result = process.extractOne(word, text.split(), score_cutoff=90)
    if result:
        found_words.add(word)

print("Найденные слова:", found_words)


ModuleNotFoundError: No module named 'fuzzywuzzy'

## **Sentence Transformer**

In [None]:
from sentence_transformers import SentenceTransformer, util

# Модель для преобразования предложений в эмбеддинги
model = SentenceTransformer('all-MiniLM-L6-v2')

# Заданный список слов/фраз
reglaments_names = ['EMC', 'AVAS', 'Braking system']

words = ["Doors", "Steering mechanism", "Safety belt",
         "Seats", "Audible warning devices", "Speedometer and odometer",
         "Steering equipment", "Heating system", "wipe and wash"]

# Текст, в котором нужно искать слова
text = """
Steering mechanism
Safety belt
"""

# Преобразуем текст и слова в векторы
text_embedding = model.encode(text)
words_embeddings = model.encode(words)

# Порог схожести
threshold = 0.25

# Находим слова, схожие с текстом
reglament_found_names = set()
for word, word_embedding in zip(words, words_embeddings):
    similarity = util.cos_sim(text_embedding, word_embedding)
    if similarity > threshold:
        found_words.add(word)



Найденные слова: {'Steering mechanism', 'Steering equipment', 'Safety belt'}


## **Spacy**

In [None]:
import spacy

# Загружаем модель spaCy
nlp = spacy.load("en_core_web_sm")

# Заданный список слов
words = ["EMC", "Doors", "Steering mechanism", "Braking", "Safety belt",
         "Seats", "Audible warning devices", "Speedometer and odometer",
         "Steering equipment", "Heating system", "AVAS", "wipe and wash"]

# Приводим список слов к нижнему регистру для сопоставления
words_set = set(word.lower() for word in words)

# Текст, в котором нужно искать слова
text = """
Steering mechanism
Safety belt
"""

# Обработка текста с помощью spaCy
doc = nlp(text.lower())

# Извлечение лемм (основных форм слов) и текстов из токенов
found_words = set()
for word in words_set:
    if chunk.text in words_set:
        found_words.add(chunk.text)

print("Найденные слова:", found_words)


Найденные слова: set()
