<a href="https://colab.research.google.com/github/DeepakDS2709/DeepakDS2709/blob/main/watsonxfinalclassification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import spacy
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score

In [None]:
# Sample data with issues and assignment groups
data = {
    'Issues': [
        "If the user has technical issues with the application collect: error message (if any), steps to reproduce the error/message, print-screen of the error, urgency, contact number)",
        "Applications issues with Middleware, Middleware (Go Anywhere FTP) or Informatica IICS. For Password reset, running process, Printing Issues, Running Reports, Changing dates on Shipments.",
        "List of issues related to iQuote: Password reset, Cannot log in, Application not displaying content, Slow response time, reports not running, Change shipment dates",
        # Add more issues here
    ],
    'Assignment Group': [
        "L2_Americas_App_CDG-AMDOCS",
        "L2_Global_App_MIDDLEWARE_SOA",
        "L2_Americas_App_iQuote",
        # Add more assignment groups here
    ]
}

In [None]:
# Convert data to DataFrame
df = pd.DataFrame(data)


In [None]:
# Load the spaCy NLP model
nlp = spacy.load("en_core_web_sm")

In [None]:
# Preprocess the text data using spaCy
def preprocess_text(text):
    doc = nlp(text)
    # Tokenize, lemmatize, and remove stop words
    processed_text = " ".join([token.lemma_ for token in doc if not token.is_stop])
    return processed_text

In [None]:
# Preprocess the 'Issues' column
df['Processed Issues'] = df['Issues'].apply(preprocess_text)


In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    df['Processed Issues'], df['Assignment Group'], test_size=0.2, random_state=42
)

In [None]:
# Vectorize the text data using TF-IDF
vectorizer = TfidfVectorizer()
X_train_vectorized = vectorizer.fit_transform(X_train)
X_test_vectorized = vectorizer.transform(X_test)


In [None]:
# Train a Multinomial Naive Bayes classifier
classifier = MultinomialNB()
classifier.fit(X_train_vectorized, y_train)

In [None]:
# Predict on test data
predictions = classifier.predict(X_test_vectorized)


In [None]:
# Calculate accuracy
accuracy = accuracy_score(y_test, predictions)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.00


In [None]:
# Example usage to predict a new issue
new_issue = "applications for password reset ."
new_issue_preprocessed = preprocess_text(new_issue)
new_issue_vectorized = vectorizer.transform([new_issue_preprocessed])
predicted_assignment_group = classifier.predict(new_issue_vectorized)[0]
print(f"Predicted Assignment Group: {predicted_assignment_group}")

Predicted Assignment Group: L2_Americas_App_iQuote
