<a href="https://colab.research.google.com/github/SriRamK345/-Ticket-Resolution-Recommendation-using-TF-IDF-and-Cosine-Similarity/blob/main/Ticket_Resolution_Recommendation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [34]:
import pandas as pd
import numpy as np
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import string
import re
import warnings
warnings.filterwarnings("ignore")
from sklearn.metrics.pairwise import cosine_similarity

# Reading The Data

In [35]:
df = pd.read_csv("/content/Book1.csv", encoding='latin-1')

In [36]:
df.head()

Unnamed: 0,Ticket Description,Possible Resolution
0,My laptop is running extremely slow. It takes ...,"Close unnecessary background applications, run..."
1,I am unable to connect to my Wi-Fi network. It...,Forget the network and reconnect with the corr...
2,My laptop gets extremely hot after just 10-15 ...,"Clean the air vents using compressed air, ensu..."
3,My laptops battery drains within an hour even...,Check the battery health status and replace th...
4,"The screen of my laptop flickers frequently, m...","Update or roll back the display driver, check ..."


In [37]:
df.isnull().sum()

Unnamed: 0,0
Ticket Description,0
Possible Resolution,0


# Preprocessing the Data

In [38]:
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [39]:
stop_words = set(stopwords.words("english"))
lemmatizer = WordNetLemmatizer()
number_pattern = re.compile(r"\d+")

def preprocess_test(text):
  text = number_pattern.sub("", text)  # Remove numbers
  text = text.translate(str.maketrans("", "", string.punctuation)).lower()  # Remove punctuation and lowercase
  text = [lemmatizer.lemmatize(word) for word in text.split() if word not in stop_words] # Lemmatization & stop word removal
  return " ".join(text)

In [40]:
df['Cleaned Description'] = df['Ticket Description'].apply(preprocess_test)

In [41]:
df.sample(2)

Unnamed: 0,Ticket Description,Possible Resolution,Cleaned Description
164,Firewall not recognizing VPN connections due t...,Check VPN configuration files for errors; ensu...,firewall recognizing vpn connection due config...
174,Firewall experiencing issues with HA (High Ava...,Check HA clustering configuration files for er...,firewall experiencing issue ha high availabili...


In [42]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [43]:
# Create a TF-IDF vectorizer
vectorizer = TfidfVectorizer()

In [44]:
# Fit the vectorizer to the issue descriptions and transform them into vectors
X_issues = vectorizer.fit_transform(df['Cleaned Description'])

# Transform the resolutions into vectors using the same vectorizer
X_resolutions = vectorizer.transform(df['Ticket Description'])

In [45]:
def find_resolution(input_issue):
    # Transform the input issue into a vector using the same vectorizer
    input_vector = vectorizer.transform([input_issue])

    # Calculate cosine similarity between the input vector and all resolution vectors
    similarities = cosine_similarity(input_vector, X_resolutions).flatten()

    # Find the index of the most similar resolution
    closest_resolution_index = np.argmax(similarities)

    return df['Possible Resolution'].iloc[closest_resolution_index]

In [47]:
# Example usage:
input_issue = input("Enter the issue:")
resolution = find_resolution(input_issue)
print(f"Resolution: {resolution}")

Enter the issue:DNS Resolution Error
Resolution: Flush DNS cache; change DNS servers to Google's public DNS (8.8.8.8 and 8.8.4.4).
