In [227]:
import spacy
nlp = spacy.load("en_core_web_lg")

In [276]:
#import pandas library
import pandas as pd


#read the dataset "news_dataset.json" provided and load it into dataframe "df"
df = pd.read_csv('ml_training_issues.csv')

#print the shape of data
print(df.shape)

#print the top5 rows
df.head()

(78, 2)


Unnamed: 0,issues,category
0,Customer is no longer receiving promotional of...,Opt-out
1,Customer opted out for being solicited for offers,Opt-out
2,Customer has removed themselves from offer not...,Opt-out
3,Customer no longer wishes to receive marketing...,Opt-out
4,Customer has chosen to stop receiving promotio...,Opt-out


In [277]:
df['category'].value_counts()


category
Opt-out     50
ID-Issue    20
Unknown      8
Name: count, dtype: int64

In [278]:
def preprocess(text):
    doc = nlp(text)
    filtered_tokens = []
    for token in doc:
        if token.is_stop or token.is_punct:
            continue
        filtered_tokens.append(token.lemma_)
    return ' '.join(filtered_tokens)

In [279]:
df['preprocessed_issue'] = df['issues'].apply(lambda text: preprocess(text))

In [280]:
df['category_id'] = df['category'].map({'Unknown': 0 , 'Opt-out': 1, 'ID-Issue': 2 })

In [281]:
df.head(50)

Unnamed: 0,issues,category,preprocessed_issue,category_id
0,Customer is no longer receiving promotional of...,Opt-out,customer long receive promotional offer,1
1,Customer opted out for being solicited for offers,Opt-out,customer opt solicit offer,1
2,Customer has removed themselves from offer not...,Opt-out,customer remove offer notification,1
3,Customer no longer wishes to receive marketing...,Opt-out,customer long wish receive marketing email,1
4,Customer has chosen to stop receiving promotio...,Opt-out,customer choose stop receive promotional outreach,1
5,Experiencing an issue with offers not appearing.,Unknown,experience issue offer appear,0
6,Customer has chosen not to receive offers,Opt-out,customer choose receive offer,1
7,Customer declined further promotional outreach,Opt-out,customer decline promotional outreach,1
8,Customer prefers not to receive promotional of...,Opt-out,customer prefer receive promotional offer,1
9,"An update in your login ID setup is necessary,...",ID-Issue,update login ID setup necessary check lead,2


In [282]:
def get_mean_vector(text):
    tokens = nlp(text)
    return np.mean([word.vector for word in tokens if word.has_vector], axis=0)
df['vector'] = df['preprocessed_issue'].apply(get_mean_vector)


In [283]:
print(df['category_id'])

0     1
1     1
2     1
3     1
4     1
     ..
73    1
74    2
75    0
76    1
77    2
Name: category_id, Length: 78, dtype: int64


In [284]:
from sklearn.model_selection import train_test_split


#Do the 'train-test' splitting with test size of 20% with random state of 2022 and stratify sampling too
X_train, X_test, y_train, y_test = train_test_split(
    df.vector.values, 
    df.category_id, 
    test_size=0.2, # 20% samples will go to test dataset
    random_state=2022,
    stratify=df.category_id
)

In [285]:
import numpy as np

print("Shape of X_train before reshaping: ", X_train.shape)
print("Shape of X_test before reshaping: ", X_test.shape)


X_train_2d = np.stack(X_train)
X_test_2d =  np.stack(X_test)

print("Shape of X_train after reshaping: ", X_train_2d.shape)
print("Shape of X_test after reshaping: ", X_test_2d.shape)

Shape of X_train before reshaping:  (62,)
Shape of X_test before reshaping:  (16,)
Shape of X_train after reshaping:  (62, 300)
Shape of X_test after reshaping:  (16, 300)


In [286]:
from sklearn.ensemble import GradientBoostingClassifier


#1. creating a GradientBoosting model object
clf = GradientBoostingClassifier()

#2. fit with all_train_embeddings and y_train
clf.fit(X_train_2d, y_train)


#3. get the predictions for all_test_embeddings and store it in y_pred
y_pred = clf.predict(X_test_2d)

In [287]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       1.00      0.50      0.67         2
           1       0.91      1.00      0.95        10
           2       1.00      1.00      1.00         4

    accuracy                           0.94        16
   macro avg       0.97      0.83      0.87        16
weighted avg       0.94      0.94      0.93        16



In [288]:
# Select one test sample
test_index = 2  # Change index to test different samples
sample_vector = X_test_2d[test_index].reshape(1, -1)  # Reshape for model
predicted_category = clf.predict(sample_vector)[0]  # Predict category

# Print result
print(f"Test Issue Vector Shape: {sample_vector.shape}")
print(f"Predicted Category ID: {predicted_category}")
print(f"Actual Category ID: {y_test.iloc[test_index]}")


Test Issue Vector Shape: (1, 300)
Predicted Category ID: 0
Actual Category ID: 0


In [290]:
import joblib

joblib.dump(clf, 'slack_issue_resolution_model.pkl')

['slack_issue_resolution_model.pkl']

In [274]:
import joblib

# Load the trained model
slack_model = joblib.load('slack_issue_resolution_model.pkl')

In [275]:
# Select one test sample
test_index = 0  # Change index to test different samples
sample_vector = X_test_2d[test_index].reshape(1, -1)  # Reshape for model
predicted_category = slack_model.predict(sample_vector)[0]  # Predict category

# Print result
print(f"Test Issue Vector Shape: {sample_vector.shape}")
print(f"Predicted Category ID: {predicted_category}")
print(f"Actual Category ID: {y_test.iloc[test_index]}")

Test Issue Vector Shape: (1, 300)
Predicted Category ID: 1
Actual Category ID: 1


In [263]:
!pip3 install flask

Collecting flask
  Downloading flask-3.1.0-py3-none-any.whl.metadata (2.7 kB)
Collecting Werkzeug>=3.1 (from flask)
  Downloading werkzeug-3.1.3-py3-none-any.whl.metadata (3.7 kB)
Collecting itsdangerous>=2.2 (from flask)
  Downloading itsdangerous-2.2.0-py3-none-any.whl.metadata (1.9 kB)
Collecting blinker>=1.9 (from flask)
  Downloading blinker-1.9.0-py3-none-any.whl.metadata (1.6 kB)
Downloading flask-3.1.0-py3-none-any.whl (102 kB)
Downloading blinker-1.9.0-py3-none-any.whl (8.5 kB)
Downloading itsdangerous-2.2.0-py3-none-any.whl (16 kB)
Downloading werkzeug-3.1.3-py3-none-any.whl (224 kB)
Installing collected packages: Werkzeug, itsdangerous, blinker, flask
[0mSuccessfully installed Werkzeug-3.1.3 blinker-1.9.0 flask-3.1.0 itsdangerous-2.2.0

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3.10 -m pip ins

In [265]:
text1 = "Customer opted out of offer"
text2 = "Customer opted out of"

doc1 = nlp(text1)
doc2 = nlp(text2)

print("Cosine Similarity:", doc1.similarity(doc2))

Cosine Similarity: 0.9653920991974476
