<a href="https://colab.research.google.com/github/SriRamK345/Ticket-Resolution-Recommendation-using-TF-IDF-and-Cosine-Similarity/blob/main/Ticket_Resolution_Recommendation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [44]:
# Data Preprocessing
import pandas as pd
# Data Manipulation
import numpy as np
# Text Preprocessing
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet
import string
import re
import random
# Ignore Warning
import warnings
warnings.filterwarnings("ignore")
# Vectorization
from sklearn.feature_extraction.text import TfidfVectorizer
# Cosine Similarity
from sklearn.metrics.pairwise import cosine_similarity

# Reading The Data

In [23]:
df = pd.read_csv("/content/Book1.csv", encoding='latin-1')

In [24]:
df.head()

Unnamed: 0,Ticket Description,Possible Resolution
0,My laptop is running extremely slow. It takes ...,"Close unnecessary background applications, run..."
1,I am unable to connect to my Wi-Fi network. It...,Forget the network and reconnect with the corr...
2,My laptop gets extremely hot after just 10-15 ...,"Clean the air vents using compressed air, ensu..."
3,My laptops battery drains within an hour even...,Check the battery health status and replace th...
4,"The screen of my laptop flickers frequently, m...","Update or roll back the display driver, check ..."


In [25]:
nltk.download('wordnet')
nltk.download('punkt_tab')

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

# Data Agumentation

In [26]:
from nltk.tokenize import word_tokenize
# Define a function to get synonyms for a word
def get_synonyms(word):
    synonyms = set()
    for syn in wordnet.synsets(word):
        for lemma in syn.lemmas():
            synonyms.add(lemma.name())
    return list(synonyms)

In [27]:
# Define a function to perform synonym replacement
def synonym_replacement(sentence, num_replacements=10):
    words = word_tokenize(sentence)
    new_sentences = []

    for _ in range(num_replacements):  # Generate 3 new sentences
        new_words = words.copy()
        # Randomly select words to replace
        indices = random.sample(range(len(words)), int(len(words) * 0.2))

        for i in indices:
            word = words[i]
            synonyms = get_synonyms(word)
            if synonyms and word in synonyms:
                synonyms.remove(word)  # Avoid replacing with the same word
            if synonyms:
                new_word = random.choice(synonyms)
                new_words[i] = new_word

        new_sentences.append(' '.join(new_words))

    separator = ' '
    return separator.join(new_sentences)

In [28]:
df['Agu Description'] = df['Ticket Description'].apply(synonym_replacement)

In [29]:
# Melt the DataFrame
melted_df = pd.melt(df, value_vars=['Ticket Description', 'Agu Description'],
                    id_vars=['Possible Resolution'],
                     value_name='Description')

# The 'Possible Resolution' column will be automatically repeated by pd.melt
melted_df.drop(columns=['variable'], inplace=True)

In [30]:
melted_df

Unnamed: 0,Possible Resolution,Description
0,"Close unnecessary background applications, run...",My laptop is running extremely slow. It takes ...
1,Forget the network and reconnect with the corr...,I am unable to connect to my Wi-Fi network. It...
2,"Clean the air vents using compressed air, ensu...",My laptop gets extremely hot after just 10-15 ...
3,Check the battery health status and replace th...,My laptops battery drains within an hour even...
4,"Update or roll back the display driver, check ...","The screen of my laptop flickers frequently, m..."
...,...,...
501,Check QoS policy configuration files for error...,network switches experiencing return with QoS ...
502,Check backup integrity regularly; ensure backu...,backup system_of_rules failing to regenerate d...
503,Check motion detection settings in camera soft...,security cameras experiencing issues with moti...
504,Check API configuration files for errors; ensu...,access control systems not integrating correct...


In [31]:
melted_df.isnull().sum()

Unnamed: 0,0
Possible Resolution,0
Description,0


In [42]:
melted_df.to_csv('melted_df.csv', index=False)

# Preprocessing the Data

In [32]:
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [33]:
stop_words = set(stopwords.words("english"))
lemmatizer = WordNetLemmatizer()
number_pattern = re.compile(r"\d+")

def preprocess_test(text):
  text = number_pattern.sub("", text)  # Remove numbers
  text = text.translate(str.maketrans("", "", string.punctuation)).lower()  # Remove punctuation and lowercase
  text = [lemmatizer.lemmatize(word) for word in text.split() if word not in stop_words] # Lemmatization & stop word removal
  return " ".join(text)

In [34]:
melted_df['Cleaned Description'] = melted_df['Description'].apply(preprocess_test)

In [35]:
melted_df.sample(10)

Unnamed: 0,Possible Resolution,Description,Cleaned Description
296,Check firewall settings to ensure they are not...,Firewall choke_up necessary network traffic Fi...,firewall chokeup necessary network traffic fir...
400,Ensure SFP modules are compatible with the swi...,mesh switch not recognizing SFP mental_faculty...,mesh switch recognizing sfp mentalfaculty due ...
63,Contact ISP support for assistance; consider s...,ISP throttling internet speeds during peak hours,isp throttling internet speed peak hour
50,Contact ISP support for assistance; perform a ...,ISP issues causing intermittent connectivity p...,isp issue causing intermittent connectivity pr...
446,Check for any obstructions in the paper path; ...,Printer experience paper jams frequently Print...,printer experience paper jam frequently printe...
285,Ensure guest network feature is enabled in rou...,Guest network non working on home router Guest...,guest network non working home router guest ne...
502,Check backup integrity regularly; ensure backu...,backup system_of_rules failing to regenerate d...,backup systemofrules failing regenerate data c...
369,Check replication configuration files for erro...,Database reverberation issues causing data inc...,database reverberation issue causing data inco...
433,Check URL filtering configuration files for er...,firewall experiencing issues with URL filterin...,firewall experiencing issue url filtering due ...
469,Replace low ink or toner cartridges with new o...,Printer producing faded prints referable to lo...,printer producing faded print referable low in...


In [36]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [37]:
# Create a TF-IDF vectorizer
vectorizer = TfidfVectorizer()

In [38]:
# Fit the vectorizer to the issue descriptions and transform them into vectors
X_issues = vectorizer.fit_transform(melted_df['Cleaned Description'])

# Transform the resolutions into vectors using the same vectorizer
X_resolutions = vectorizer.transform(melted_df['Description'])

In [39]:
def find_resolution(input_issue):

    preprocessed_test = preprocess_test(input_issue)
    # Transform into a vector
    input_vector = vectorizer.transform([preprocessed_test])

    # Calculate cosine similarity between the input vector and all resolution vectors
    similarities = cosine_similarity(input_vector, X_resolutions).flatten()

    # Find the index of the most similar resolution
    closest_resolution_index = np.argmax(similarities)

    return melted_df['Possible Resolution'].iloc[closest_resolution_index]

In [43]:
# Example usage:
input_issue = input("Enter the issue:")
resolution = find_resolution(input_issue)
print(f"Resolution: {resolution}")

Enter the issue:backup system_of_rules failing to regenerate
Resolution: Ensure network connectivity between server and backup destination is stable; check for any firewall rules blocking backup
