In [22]:
import pandas as pd
from google.colab import drive
import re
import spacy
from spacy.util import minibatch, compounding
import random
from spacy.training import Example
from spacy.tokens import DocBin
from spacy.util import filter_spans
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, classification_report, confusion_matrix

In [23]:
drive.mount('/content/drive/')

%cd drive/MyDrive/Text Mining Project

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).
[Errno 2] No such file or directory: 'drive/MyDrive/Text Mining Project'
/content/drive/MyDrive/Text Mining Project


In [24]:
file_path = 'datasetV2.csv'

df = pd.read_csv(file_path)

In [25]:
df.head()

Unnamed: 0,text,label
0,The in-app chat support feature is incredibly ...,1
1,The ability to track multiple orders simultane...,1
2,I suggest adding a feature to customize delive...,1
3,Providing estimated delivery times for each re...,1
4,Offering discounts for frequent users would en...,1


## Preprocessing

In [26]:
def clean_text(text):
    # Remove special characters and extra whitespace
    cleaned_text = re.sub(r'[^A-Za-z0-9\s\.]', '', text)
    # Convert multiple whitespace characters into a single space
    cleaned_text = re.sub(r'\s+', ' ', cleaned_text)
    # Convert text to lowercase
    cleaned_text = cleaned_text.lower()
    return cleaned_text

In [27]:
df['text'] = df['text'].apply(clean_text)

## Model


In [28]:
def identify_constructive_feedback(review):
    constructive_keywords = [
        "improve", "enhance", "fix", "change", "update", "adjust", "revise", "modify",
        "suggest", "recommend", "advise", "encourage", "strengthen",
        "clarify", "consider", "explore", "rework", "optimize",
        "feature", "bug", "performance", "interface",
        "usability", "navigation", "speed", "loading", "responsiveness",
        "compatibility", "security", "privacy", "feedback", "review", "rating",
        "test", "stability", "crash", "error",
        "warning", "notification", "settings", "behavior", "timing", "request",
        "design", "layout", "functionality", "user-friendly", "efficiency",
        "error-free", "smooth", "streamlined", "robust", "intuitive", "suggestion",
        "recommendation", "problem", "issue", "request", "requesting", "need", "demand",
        "suggestion", "idea", "solution", "enhancement", "modification", "adjustment",
        "change", "update", "revision", "upgrade", "overhaul", "redo", "remake", "redo",
        "enhance", "upgrade", "overhaul", "redo", "redesign", "revamp", "amend",
        "rethink", "reconsider", "refinement", "overcome", "innovation", "progress",
        "innovative", "evolve", "streamline", "enhanced", "optimized", "improved",
        "upgraded", "redesigned", "revamped", "amended", "refined", "innovated",
        "evolved", "streamlined", "effortless", "flawless", "smooth", "intuitive",
        "user-friendly", "friendly", "easy-to-use", "usable", "effective", "efficient",
        "effective", "efficient", "convenient", "seamless", "quick", "responsive",
        "stable", "reliable", "secure", "private", "protected", "protected", "consistent",
        "satisfactory", "satisfaction", "pleasant", "enjoyable", "helpful", "useful",
        "valuable", "valued", "beneficial", "advantageous", "superior", "quality",
        "high-quality", "premium", "exceptional", "excellent", "outstanding", "remarkable",
        "fantastic", "awesome", "wonderful", "amazing", "impressive", "brilliant",
        "stellar", "great", "good", "well", "positive", "favorable", "constructive"
    ]


    for keyword in constructive_keywords:
        if keyword in review:
            return 1

    return 0

In [29]:
X_test = df['text']
y_test = df['label']

In [30]:
y_pred = []

for text in X_test:
    prediction = identify_constructive_feedback(text)
    y_pred.append(prediction)

In [31]:
print(classification_report(y_test,y_pred))
print('Confusion Matrix:',confusion_matrix(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.53      0.55      0.54        94
           1       0.68      0.66      0.67       138

    accuracy                           0.62       232
   macro avg       0.60      0.61      0.61       232
weighted avg       0.62      0.62      0.62       232

Confusion Matrix: [[52 42]
 [47 91]]


In [32]:
auc_score = roc_auc_score(y_test, y_pred)
print(f'AUC Score: {auc_score}')

AUC Score: 0.6063058896083874


In [33]:
feedbacks = []

for text, prediction in zip(X_test, y_pred):
    if prediction == 1:
      feedbacks.append(text)

feedbacks

['the inapp chat support feature is incredibly helpful.',
 'the ability to track multiple orders simultaneously would be beneficial.',
 'i suggest adding a feature to customize delivery instructions.',
 'providing estimated delivery times for each restaurant would improve transparency.',
 'offering discounts for frequent users would encourage loyalty.',
 'the apps search function needs refinement for better results.',
 'ive experienced issues with payment processing which is frustrating.',
 'the latest update has introduced bugs making the app unusable.',
 'the app lacks a feature to reorder previous orders easily.',
 'a feature to rate specific items within an order would provide valuable feedback.',
 'the apps notification system needs improvement for timely updates.',
 'adding a dark mode option would be great for nighttime usage.',
 'the apps loading times are frustratingly slow.',
 'the customer support team is unresponsive to inquiries.',
 'the apps layout could be more visually 