In [13]:
class Config:
    TICKET_SUMMARY = 'Ticket Summary'
    INTERACTION_CONTENT = 'Interaction content'
    TYPE_COLS = ['y2', 'y3', 'y4']
    CLASS_COL = 'y2'
    GROUPED = 'y1'  # Not used in chained model


In [19]:
import pandas as pd

# Step 1: Configuration Class
class Config:
    TICKET_SUMMARY = 'Ticket Summary'
    INTERACTION_CONTENT = 'Interaction content'

# Step 2: Define Preprocessing Function
def load_and_clean_data(file_path):
    df = pd.read_csv(file_path)

    # Drop rows where target labels are missing
    df = df.dropna(subset=['Type 2', 'Type 3', 'Type 4'])

    # Combine ticket summary and interaction content into a single 'text' column
    df['text'] = df[Config.TICKET_SUMMARY].fillna('') + ' ' + df[Config.INTERACTION_CONTENT].fillna('')

    # Rename target columns for model training
    df = df.rename(columns={
        'Type 2': 'y2',
        'Type 3': 'y3',
        'Type 4': 'y4'
    })

    # Create chained label combinations
    df['y2_y3'] = df['y2'] + ' ' + df['y3']
    df['y2_y3_y4'] = df['y2'] + ' ' + df['y3'] + ' ' + df['y4']

    return df[['text', 'y2', 'y2_y3', 'y2_y3_y4']]

# Step 3: Call the function with the correct path to your dataset
file_path = "C:/Users/sreel/Downloads/skeleton (1)/skeleton/data/AppGallery.csv"
df = load_and_clean_data(file_path)

# Step 4: Preview the data
df.head()


Unnamed: 0,text,y2,y2_y3,y2_y3_y4
1,[AppGallery]Probleme und Vorschläge Beschreibu...,Problem/Fault,Problem/Fault AppGallery-Install/Upgrade,Problem/Fault AppGallery-Install/Upgrade Can't...
2,Re: RE : [AppGallery]Probleme und Vorschläge R...,Problem/Fault,Problem/Fault AppGallery-Install/Upgrade,Problem/Fault AppGallery-Install/Upgrade Can't...
3,[AppGallery]Problems and suggestions Descripti...,Suggestion,Suggestion AppGallery-Use,Suggestion AppGallery-Use Others
4,Ho pagato ma l'applicazione non ha funzionato...,Problem/Fault,Problem/Fault Third Party APPs,Problem/Fault Third Party APPs Refund
5,Aspiegel Support issue submit Product: AppGall...,Suggestion,Suggestion VIP / Offers / Promotions,Suggestion VIP / Offers / Promotions Offers / ...


In [20]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Step 1: TF-IDF Vectorizer
vectorizer = TfidfVectorizer(max_features=1000)

# ----- Stage 1: Predict y2 -----
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['y2'], test_size=0.2, random_state=42)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

model_y2 = RandomForestClassifier()
model_y2.fit(X_train_vec, y_train)
y_pred = model_y2.predict(X_test_vec)

print("Stage 1 - Type 2 Prediction Accuracy:", accuracy_score(y_test, y_pred))

# ----- Stage 2: Predict y2 + y3 -----
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['y2_y3'], test_size=0.2, random_state=42)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

model_y2y3 = RandomForestClassifier()
model_y2y3.fit(X_train_vec, y_train)
y_pred = model_y2y3.predict(X_test_vec)

print("Stage 2 - Type 2 + Type 3 Prediction Accuracy:", accuracy_score(y_test, y_pred))

# ----- Stage 3: Predict y2 + y3 + y4 -----
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['y2_y3_y4'], test_size=0.2, random_state=42)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

model_y2y3y4 = RandomForestClassifier()
model_y2y3y4.fit(X_train_vec, y_train)
y_pred = model_y2y3y4.predict(X_test_vec)

print("Stage 3 - Type 2 + Type 3 + Type 4 Prediction Accuracy:", accuracy_score(y_test, y_pred))


Stage 1 - Type 2 Prediction Accuracy: 0.8333333333333334
Stage 2 - Type 2 + Type 3 Prediction Accuracy: 0.8333333333333334
Stage 3 - Type 2 + Type 3 + Type 4 Prediction Accuracy: 0.6111111111111112


In [21]:
from sklearn.metrics import classification_report

# ----- Stage 1: Predict y2 -----
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['y2'], test_size=0.2, random_state=42)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

model_y2 = RandomForestClassifier()
model_y2.fit(X_train_vec, y_train)
y_pred = model_y2.predict(X_test_vec)

print("🔹 Stage 1 - Type 2")
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

# ----- Stage 2: Predict y2 + y3 -----
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['y2_y3'], test_size=0.2, random_state=42)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

model_y2y3 = RandomForestClassifier()
model_y2y3.fit(X_train_vec, y_train)
y_pred = model_y2y3.predict(X_test_vec)

print("🔹 Stage 2 - Type 2 + Type 3")
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

# ----- Stage 3: Predict y2 + y3 + y4 -----
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['y2_y3_y4'], test_size=0.2, random_state=42)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

model_y2y3y4 = RandomForestClassifier()
model_y2y3y4.fit(X_train_vec, y_train)
y_pred = model_y2y3y4.predict(X_test_vec)

print("🔹 Stage 3 - Type 2 + Type 3 + Type 4")
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


🔹 Stage 1 - Type 2
Accuracy: 0.8888888888888888
               precision    recall  f1-score   support

Problem/Fault       0.89      1.00      0.94        16
   Suggestion       0.00      0.00      0.00         2

     accuracy                           0.89        18
    macro avg       0.44      0.50      0.47        18
 weighted avg       0.79      0.89      0.84        18



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


🔹 Stage 2 - Type 2 + Type 3
Accuracy: 0.7777777777777778
                                          precision    recall  f1-score   support

Problem/Fault AppGallery-Install/Upgrade       0.80      0.80      0.80         5
Problem/Fault Coupon/Gifts/Points Issues       0.82      0.90      0.86        10
                   Problem/Fault General       0.00      0.00      0.00         0
          Problem/Fault Third Party APPs       1.00      1.00      1.00         1
                      Suggestion General       0.00      0.00      0.00         1
    Suggestion VIP / Offers / Promotions       0.00      0.00      0.00         1

                                accuracy                           0.78        18
                               macro avg       0.44      0.45      0.44        18
                            weighted avg       0.73      0.78      0.75        18



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


🔹 Stage 3 - Type 2 + Type 3 + Type 4
Accuracy: 0.6666666666666666
                                                                              precision    recall  f1-score   support

                 Problem/Fault AppGallery-Install/Upgrade Can't install Apps       0.50      0.50      0.50         2
                  Problem/Fault AppGallery-Install/Upgrade Can't update Apps       1.00      1.00      1.00         1
Problem/Fault AppGallery-Install/Upgrade Other download/install/update issue       1.00      0.50      0.67         2
               Problem/Fault AppGallery-Use UI Abnormal in Huawei AppGallery       0.00      0.00      0.00         0
               Problem/Fault Coupon/Gifts/Points Issues Can't use or acquire       0.88      1.00      0.93         7
          Problem/Fault Coupon/Gifts/Points Issues Cooperated campaign issue       0.50      0.33      0.40         3
                                       Problem/Fault Third Party APPs Refund       0.50      1.00      0.67

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [22]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# --- Config for column access ---
class Config:
    TICKET_SUMMARY = 'Ticket Summary'
    INTERACTION_CONTENT = 'Interaction content'

# --- Load and clean Purchasing.csv ---
def load_and_clean_data(file_path):
    df = pd.read_csv(file_path)
    df = df.dropna(subset=['Type 2', 'Type 3', 'Type 4'])

    df['text'] = df[Config.TICKET_SUMMARY].fillna('') + ' ' + df[Config.INTERACTION_CONTENT].fillna('')

    df = df.rename(columns={
        'Type 2': 'y2',
        'Type 3': 'y3',
        'Type 4': 'y4'
    })

    df['y2_y3'] = df['y2'] + ' ' + df['y3']
    df['y2_y3_y4'] = df['y2'] + ' ' + df['y3'] + ' ' + df['y4']

    return df[['text', 'y2', 'y2_y3', 'y2_y3_y4']]


df = load_and_clean_data("C:/Users/sreel/Downloads/skeleton (1)/skeleton/data/Purchasing.csv")

# --- TF-IDF Vectorizer ---
vectorizer = TfidfVectorizer(max_features=1000)

# --- Stage 1: Predict y2 ---
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['y2'], test_size=0.2, random_state=42)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

model1 = RandomForestClassifier()
model1.fit(X_train_vec, y_train)
y_pred = model1.predict(X_test_vec)

print("🔹 Stage 1 – Type 2")
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

# --- Stage 2: Predict y2 + y3 ---
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['y2_y3'], test_size=0.2, random_state=42)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

model2 = RandomForestClassifier()
model2.fit(X_train_vec, y_train)
y_pred = model2.predict(X_test_vec)

print("\n🔹 Stage 2 – Type 2 + Type 3")
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

# --- Stage 3: Predict y2 + y3 + y4 ---
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['y2_y3_y4'], test_size=0.2, random_state=42)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

model3 = RandomForestClassifier()
model3.fit(X_train_vec, y_train)
y_pred = model3.predict(X_test_vec)

print("\n🔹 Stage 3 – Type 2 + Type 3 + Type 4")
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


🔹 Stage 1 – Type 2
Accuracy: 0.9375
               precision    recall  f1-score   support

Problem/Fault       0.67      1.00      0.80         2
   Suggestion       1.00      0.93      0.96        14

     accuracy                           0.94        16
    macro avg       0.83      0.96      0.88        16
 weighted avg       0.96      0.94      0.94        16


🔹 Stage 2 – Type 2 + Type 3
Accuracy: 0.875
                             precision    recall  f1-score   support

Problem/Fault Payment issue       0.67      1.00      0.80         2
         Suggestion Payment       1.00      0.86      0.92        14
          Suggestion Refund       0.00      0.00      0.00         0

                   accuracy                           0.88        16
                  macro avg       0.56      0.62      0.57        16
               weighted avg       0.96      0.88      0.91        16



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



🔹 Stage 3 – Type 2 + Type 3 + Type 4
Accuracy: 0.875
                                                                  precision    recall  f1-score   support

                      Problem/Fault Payment issue Payment failed       1.00      1.00      1.00         1
                        Problem/Fault Payment issue Risk Control       0.50      1.00      0.67         1
                    Suggestion Payment Subscription cancellation       1.00      0.86      0.92        14
Suggestion Refund Within 14 days of purchase (not product issue)       0.00      0.00      0.00         0

                                                        accuracy                           0.88        16
                                                       macro avg       0.62      0.71      0.65        16
                                                    weighted avg       0.97      0.88      0.91        16



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
