In [10]:
pip install fuzzywuzzy

Collecting fuzzywuzzy
  Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl.metadata (4.9 kB)
Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl (18 kB)
Installing collected packages: fuzzywuzzy
Successfully installed fuzzywuzzy-0.18.0


In [14]:
import pandas as pd
import string
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
import nltk

nltk.download('punkt')
nltk.download('stopwords')

def preprocess(text):
    text = text.lower()  # Lowercasing
    text = text.translate(str.maketrans('', '', string.punctuation))  # Removing punctuation
    tokens = word_tokenize(text)  # Tokenization
    stop_words = set(stopwords.words('english'))  # Stop words
    tokens = [word for word in tokens if word not in stop_words]  # Removing stop words
    stemmer = PorterStemmer()  # Stemming
    tokens = [stemmer.stem(word) for word in tokens]
    return ' '.join(tokens)

# Example data
resolved_queries = [
    "Unable to connect to the internet",
    "Payment failed during checkout",
    "App crashes when opening settings",
    "Forgot password and unable to reset",
    "Unable to upload files to the server"
]

new_queries = [
    "Unabel to conect to the internet",
    "Can’t connect to internet",
    "Intenet not working",
    "Payment failed while chekout",
    "Payment did not go through during chckout",
    "Payment issue at check out",
    "Application crashes when opening setings",
    "App crash when going to settings",
    "Settings cause the app to chrash",
    "Forgot passwrd and cant reset",
    "Forgotten password, unable to reset",
    "I can’t reset my password",
    "Unable to uplod file to server",
    "Can't upload files on to the server",
    "File uploading to server not working",
    "No internet connection, please help",
    "Checkout page says payment failed",
    "Settings page crashes app immediately",
    "Password reset link not working",
    "Server upload fails with an error message"
]

# Preprocess the queries
preprocessed_resolved_queries = [preprocess(q) for q in resolved_queries]
preprocessed_new_queries = [preprocess(q) for q in new_queries]


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [16]:
from fuzzywuzzy import fuzz

def fuzzy_match(query, resolved_queries):
    matches = [(resolved_query, fuzz.ratio(query, resolved_query)) for resolved_query in resolved_queries]
    best_match = max(matches, key=lambda x: x[1])
    return best_match

# Perform fuzzy matching
for uq in new_queries:
    preprocessed_uq = preprocess(uq)
    match = fuzzy_match(preprocessed_uq, preprocessed_resolved_queries)
    print(f"New Query: '{uq}'")
    print(f"Best Match: '{match[0]}' with Score: {match[1]}")
    print("---")



New Query: 'Unabel to conect to the internet'
Best Match: 'unabl connect internet' with Score: 95
---
New Query: 'Can’t connect to internet'
Best Match: 'unabl connect internet' with Score: 85
---
New Query: 'Intenet not working'
Best Match: 'unabl connect internet' with Score: 41
---
New Query: 'Payment failed while chekout'
Best Match: 'payment fail checkout' with Score: 98
---
New Query: 'Payment did not go through during chckout'
Best Match: 'payment fail checkout' with Score: 82
---
New Query: 'Payment issue at check out'
Best Match: 'payment fail checkout' with Score: 77
---
New Query: 'Application crashes when opening setings'
Best Match: 'app crash open set' with Score: 90
---
New Query: 'App crash when going to settings'
Best Match: 'app crash open set' with Score: 88
---
New Query: 'Settings cause the app to chrash'
Best Match: 'app crash open set' with Score: 49
---
New Query: 'Forgot passwrd and cant reset'
Best Match: 'forgot password unabl reset' with Score: 85
---
New Qu

In [17]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def tfidf_cosine_similarity(new_queries, resolved_queries):
    vectorizer = TfidfVectorizer()
    all_queries = new_queries + resolved_queries
    tfidf_matrix = vectorizer.fit_transform(all_queries)

    new_queries_tfidf = tfidf_matrix[:len(new_queries)]
    resolved_queries_tfidf = tfidf_matrix[len(new_queries):]

    similarities = cosine_similarity(new_queries_tfidf, resolved_queries_tfidf)
    return similarities

# Compute cosine similarities
similarities = tfidf_cosine_similarity(preprocessed_new_queries, preprocessed_resolved_queries)

for i, uq in enumerate(new_queries):
    best_match_index = similarities[i].argmax()
    best_match_score = similarities[i].max()
    print(f"New Query: '{uq}'")
    print(f"Best Match: '{resolved_queries[best_match_index]}' with Cosine Similarity: {best_match_score:.2f}")
    print("---")


New Query: 'Unabel to conect to the internet'
Best Match: 'Unable to connect to the internet' with Cosine Similarity: 0.27
---
New Query: 'Can’t connect to internet'
Best Match: 'Unable to connect to the internet' with Cosine Similarity: 0.85
---
New Query: 'Intenet not working'
Best Match: 'Unable to connect to the internet' with Cosine Similarity: 0.00
---
New Query: 'Payment failed while chekout'
Best Match: 'Payment failed during checkout' with Cosine Similarity: 0.54
---
New Query: 'Payment did not go through during chckout'
Best Match: 'Payment failed during checkout' with Cosine Similarity: 0.24
---
New Query: 'Payment issue at check out'
Best Match: 'Payment failed during checkout' with Cosine Similarity: 0.23
---
New Query: 'Application crashes when opening setings'
Best Match: 'App crashes when opening settings' with Cosine Similarity: 0.47
---
New Query: 'App crash when going to settings'
Best Match: 'App crashes when opening settings' with Cosine Similarity: 0.68
---
New Qu