In [1]:
import pandas as pd
import plotly.express as px
from gensim.models.doc2vec import Doc2Vec

# Supervised Learning Classic ML Classificatin Models using Scikit-Learn

In [2]:
df = pd.read_csv("data/company_tagged.csv", sep=";")
df

Unnamed: 0,company_name,description_en,category,score,tokenized_desc,tag
0,Le Fourgon,Le Fourgon delivers your stored drinks to your...,food_beverages_tobacco,49,fourgon deliver store drink home order place l...,0
1,Comptoir des Vignes,Comptoir des Vignes is a brand of cellars spec...,food_beverages_tobacco,49,comptoir des vigne brand cellar specialize win...,1
2,Shin Sekai,Welcome to our Trustpilot page! Shin Sekai is ...,food_beverages_tobacco,49,welcome trustpilot page shin sekai online figu...,2
3,Nutri Naturel,"Nutri-Naturel.com, the leading online organic ...",food_beverages_tobacco,49,nutri naturel com lead online organic grocery ...,3
4,Maison Martin - Le Piment Français,Maison Martin - Le Piment Francais is the firs...,food_beverages_tobacco,49,maison martin piment francais brand artisanal ...,4
...,...,...,...,...,...,...
11385,Ljbautoparts,"Sale of auto body spare parts online: fender, ...",vehicles_transportation,12,sale auto body spare part online fender bumper...,11385
11386,Aéroports de Paris,"Aeroports de Paris, with its three platforms, ...",vehicles_transportation,12,aeroport paris platform major connection point...,11386
11387,Online SAS,"Shared hosting with unlimited traffic, domain ...",vehicles_transportation,17,share host unlimited traffic domain dedicated ...,11387
11388,shopequitation,Online specialist in the sale of horse riding ...,vehicles_transportation,12,online specialist sale horse ride equipment sa...,11388


In [3]:
category_count = df["category"].value_counts().reset_index()
category_count

Unnamed: 0,category,count
0,electronics_technology,1172
1,home_garden,1101
2,shopping_fashion,1054
3,money_insurance,1030
4,events_entertainment,761
5,beauty_wellbeing,755
6,food_beverages_tobacco,736
7,construction_manufactoring,704
8,business_services,679
9,education_training,648


In [4]:
fig = px.bar(category_count, x="category", y="count", title="Number of Companies per Category")
fig.show()

In [5]:
category_list = list(category_count[category_count["count"] >= 450]["category"])
category_list

['electronics_technology',
 'home_garden',
 'shopping_fashion',
 'money_insurance',
 'events_entertainment',
 'beauty_wellbeing',
 'food_beverages_tobacco',
 'construction_manufactoring',
 'business_services',
 'education_training',
 'vehicles_transportation']

In [6]:
company_sample = df[df["category"].isin(category_list)].dropna()
company_sample["category"].value_counts().reset_index()

Unnamed: 0,category,count
0,electronics_technology,1172
1,home_garden,1101
2,shopping_fashion,1054
3,money_insurance,1030
4,events_entertainment,761
5,beauty_wellbeing,755
6,food_beverages_tobacco,736
7,construction_manufactoring,704
8,business_services,679
9,education_training,648


In [7]:
company_sample = company_sample.groupby('category').apply(lambda x: x.sample(n=407, random_state=42)).reset_index(drop=True)
company_sample

Unnamed: 0,company_name,description_en,category,score,tokenized_desc,tag
0,Oscilance Sophrologie,Develop your well-being with sophrology Diplom...,beauty_wellbeing,47,develop sophrology diploma rncp certification ...,2256
1,Lesentiergeobio,Welcome to my little ecological store which is...,beauty_wellbeing,40,welcome little ecological store grow little da...,2501
2,Salvia Nutrition,Expert in aromatherapy Essential oils and cosm...,beauty_wellbeing,47,expert aromatherapy essential oil cosmetic fre...,2004
3,Mahasoa,Buy your hairdresser's products online thanks ...,beauty_wellbeing,46,buy hairdresser product online thank mahasoa m...,2042
4,Cannibia,"To respect our commitment, the efficiency, saf...",beauty_wellbeing,41,respect commitment efficiency safety quality p...,2458
...,...,...,...,...,...,...
4472,Colmar Auto Bilan,Colmar Auto Bilan is a technical inspection ce...,vehicles_transportation,38,colmar auto bilan technical inspection center ...,11183
4473,CapCar,CapCar is revolutionizing the buying and selli...,vehicles_transportation,32,capcar revolutionize buying selling car trust ...,11274
4474,macadam cycles,Merchant of unique bicycles and accessories! W...,vehicles_transportation,24,merchant unique bicycle accessory official dis...,11333
4475,Vélo service Travu,Are you planning to buy a bike or have yours r...,vehicles_transportation,38,plan buy bike repair come talk support,11185


In [8]:
d2v = Doc2Vec.load("models/d2v.model")

In [9]:
tags = company_sample["tag"]
labels = company_sample["company_name"]
category = company_sample["category"]
vectors = [d2v.dv[tag] for tag in tags]

In [10]:
len(tags)

4477

In [11]:
len(vectors)

4477

In [12]:
vectors_df = pd.DataFrame(vectors)

In [13]:
vectors_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,1.555916,3.732630,3.569272,-1.574828,3.526356,-1.541315,-5.257199,0.213623,0.593278,3.616737,-16.275211,0.581281,-3.401930,2.320469,3.157410,4.393044,12.101939,-0.747775,-0.511097,1.408481
1,1.348109,1.019928,-9.346064,-5.572748,-4.856998,-6.188309,-3.559583,5.256246,-8.304394,-1.915268,3.382292,6.648015,-1.871945,9.686320,2.735891,1.763738,8.780616,1.186969,1.999301,2.743771
2,9.066367,-3.344702,-1.788065,1.012523,-2.358032,-3.962410,-4.055778,-1.833944,2.938650,1.612116,-1.374383,5.778584,-4.997014,2.162220,3.363889,4.667016,13.557610,-5.855724,-5.707810,1.010891
3,1.495197,-2.396264,-4.005068,-4.147858,3.863000,-1.856706,-1.879233,1.139206,-9.948735,5.715406,-2.385922,3.473161,-0.575435,-2.402317,-3.309796,1.950958,8.737164,-6.225251,1.628477,-2.316124
4,4.377453,-9.588228,0.572681,-1.564694,-2.288914,-9.874044,-3.364615,5.585204,-2.070639,-2.879122,-5.677154,2.318398,-5.963313,-1.794160,5.300523,4.461276,7.500114,-5.311936,-2.008378,4.261292
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4472,-6.717722,0.349456,3.132945,-0.401000,-0.441590,-1.189616,1.708816,-0.146644,-0.190129,7.019242,-2.543383,-2.808481,-3.841403,-0.658023,2.433311,0.312751,11.966047,-3.948421,3.102744,-6.873521
4473,-6.911193,1.440823,2.548128,-3.100884,8.822441,4.941241,2.198640,5.161830,-9.020262,-3.514540,2.992202,-2.823585,-9.801691,5.680787,-6.144229,1.151869,5.331325,-5.028971,3.836699,-10.583253
4474,1.455796,-2.668402,-1.389625,-3.062446,-2.821195,-0.383443,-1.318816,-0.228552,-7.144111,-2.114722,0.626333,-8.447633,3.005778,-2.626287,2.584342,-6.401578,6.933508,4.941571,-4.600127,-5.776989
4475,-5.760660,5.931286,-2.306327,-6.683461,-0.256726,-6.611842,6.459180,-4.065218,-8.164078,-0.796073,-3.951172,3.274068,5.370887,5.028344,-1.199682,1.805775,7.411559,-5.398070,6.530111,-2.206319


In [14]:
vectors_df["category"] = category

In [15]:
vectors_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,11,12,13,14,15,16,17,18,19,category
0,1.555916,3.732630,3.569272,-1.574828,3.526356,-1.541315,-5.257199,0.213623,0.593278,3.616737,...,0.581281,-3.401930,2.320469,3.157410,4.393044,12.101939,-0.747775,-0.511097,1.408481,beauty_wellbeing
1,1.348109,1.019928,-9.346064,-5.572748,-4.856998,-6.188309,-3.559583,5.256246,-8.304394,-1.915268,...,6.648015,-1.871945,9.686320,2.735891,1.763738,8.780616,1.186969,1.999301,2.743771,beauty_wellbeing
2,9.066367,-3.344702,-1.788065,1.012523,-2.358032,-3.962410,-4.055778,-1.833944,2.938650,1.612116,...,5.778584,-4.997014,2.162220,3.363889,4.667016,13.557610,-5.855724,-5.707810,1.010891,beauty_wellbeing
3,1.495197,-2.396264,-4.005068,-4.147858,3.863000,-1.856706,-1.879233,1.139206,-9.948735,5.715406,...,3.473161,-0.575435,-2.402317,-3.309796,1.950958,8.737164,-6.225251,1.628477,-2.316124,beauty_wellbeing
4,4.377453,-9.588228,0.572681,-1.564694,-2.288914,-9.874044,-3.364615,5.585204,-2.070639,-2.879122,...,2.318398,-5.963313,-1.794160,5.300523,4.461276,7.500114,-5.311936,-2.008378,4.261292,beauty_wellbeing
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4472,-6.717722,0.349456,3.132945,-0.401000,-0.441590,-1.189616,1.708816,-0.146644,-0.190129,7.019242,...,-2.808481,-3.841403,-0.658023,2.433311,0.312751,11.966047,-3.948421,3.102744,-6.873521,vehicles_transportation
4473,-6.911193,1.440823,2.548128,-3.100884,8.822441,4.941241,2.198640,5.161830,-9.020262,-3.514540,...,-2.823585,-9.801691,5.680787,-6.144229,1.151869,5.331325,-5.028971,3.836699,-10.583253,vehicles_transportation
4474,1.455796,-2.668402,-1.389625,-3.062446,-2.821195,-0.383443,-1.318816,-0.228552,-7.144111,-2.114722,...,-8.447633,3.005778,-2.626287,2.584342,-6.401578,6.933508,4.941571,-4.600127,-5.776989,vehicles_transportation
4475,-5.760660,5.931286,-2.306327,-6.683461,-0.256726,-6.611842,6.459180,-4.065218,-8.164078,-0.796073,...,3.274068,5.370887,5.028344,-1.199682,1.805775,7.411559,-5.398070,6.530111,-2.206319,vehicles_transportation


In [16]:
df_shuffled = vectors_df.sample(frac = 1)

In [17]:
df_shuffled

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,11,12,13,14,15,16,17,18,19,category
2085,1.224406,3.384380,0.322843,1.342431,-0.289813,-2.298068,-1.124613,0.848236,-2.104766,-0.956906,...,0.483626,0.062560,-1.762253,-2.342299,-0.414950,4.476934,0.071476,-0.337353,-3.793461,events_entertainment
3744,-6.885200,1.457719,0.291891,-4.906013,0.596729,-3.110317,-0.331666,3.179823,-4.502203,-6.758286,...,3.510781,-5.100393,-5.028840,-0.280375,-1.996063,1.724986,0.138318,-1.151723,-4.873713,shopping_fashion
2736,1.088331,-4.643239,-1.429445,-2.709188,-7.896357,-3.144679,-0.951984,10.489480,-4.401153,-7.203173,...,-0.770394,1.043401,-1.008071,1.354683,2.899529,0.735733,0.657103,-2.020345,4.575097,food_beverages_tobacco
470,5.676783,-1.974664,1.303457,-2.249200,-1.645018,1.190687,-4.632554,15.975235,-3.482816,-9.925109,...,5.706667,3.216900,-1.295587,-3.196220,9.752891,8.637935,-1.675328,6.178218,4.520153,business_services
2128,-5.062718,-1.171038,1.665671,-0.698343,-7.040158,-5.058622,0.265715,2.210385,-2.705788,-7.182459,...,5.793700,-4.967267,-0.355086,-8.555696,-3.867837,7.872692,3.496317,-5.296865,-7.974826,events_entertainment
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
379,-2.492742,-4.193331,-0.118954,-3.792245,-3.854279,-1.917014,3.587872,5.773516,-2.907598,2.174008,...,-4.768588,-0.898678,-5.521359,-4.377561,0.656628,7.197237,-3.659373,-0.213172,-0.662499,beauty_wellbeing
3230,1.017589,-3.327535,-0.758948,-0.906026,2.060287,-2.666766,5.542036,2.425282,-6.711596,-9.750267,...,-0.128969,-0.526617,-2.393811,4.213388,-6.744339,8.520102,-3.912719,-6.889428,0.729159,home_garden
1123,3.044461,-2.552159,2.310256,9.462728,2.196751,-3.280374,4.472308,4.085920,-10.613445,-7.233757,...,-3.513761,5.417091,6.037434,3.134390,1.053260,-0.229303,-6.701207,6.326281,-10.399705,construction_manufactoring
2046,3.849948,0.488308,0.084381,0.578820,-4.431014,-0.703103,2.800950,8.625644,4.904677,-10.856080,...,-5.291849,8.312619,0.098279,4.747036,5.768453,1.689818,-1.451958,0.987114,-0.253188,events_entertainment


### Split Train Test

In [18]:
from sklearn.model_selection import train_test_split

In [19]:
y = df_shuffled["category"]
X = df_shuffled.drop(columns=["category"])

In [20]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=2)

### Classification Models

In [21]:
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import SGDClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from lightgbm import LGBMClassifier
from xgboost.sklearn import XGBClassifier

### SVM

In [22]:
svm = SVC(kernel = 'linear', random_state = 0)
svm.fit(X_train, y_train)
#Prediction sur le Test set
y_pred = svm.predict(X_test)

In [23]:
from sklearn.metrics import classification_report
from sklearn.metrics  import f1_score,accuracy_score

In [24]:
report = classification_report(y_test, y_pred, output_dict=True)

In [25]:
print(classification_report(y_test, y_pred))

                            precision    recall  f1-score   support

          beauty_wellbeing       0.53      0.58      0.56        77
         business_services       0.40      0.28      0.33        90
construction_manufactoring       0.38      0.33      0.36        75
        education_training       0.66      0.73      0.69        93
    electronics_technology       0.52      0.63      0.57        83
      events_entertainment       0.49      0.39      0.44       100
    food_beverages_tobacco       0.59      0.64      0.61        84
               home_garden       0.40      0.31      0.35        80
           money_insurance       0.60      0.67      0.63        67
          shopping_fashion       0.52      0.64      0.57        75
   vehicles_transportation       0.60      0.67      0.63        72

                  accuracy                           0.53       896
                 macro avg       0.52      0.53      0.52       896
              weighted avg       0.52      0.5

### GaussianNB

In [26]:
nb = GaussianNB()
nb.fit(X_train, y_train)
y_pred = nb.predict(X_test)

In [27]:
report = classification_report(y_test, y_pred, output_dict=True)

In [28]:
print(classification_report(y_test, y_pred))

                            precision    recall  f1-score   support

          beauty_wellbeing       0.52      0.57      0.54        77
         business_services       0.43      0.30      0.35        90
construction_manufactoring       0.42      0.43      0.42        75
        education_training       0.67      0.71      0.69        93
    electronics_technology       0.56      0.52      0.54        83
      events_entertainment       0.42      0.30      0.35       100
    food_beverages_tobacco       0.64      0.64      0.64        84
               home_garden       0.42      0.34      0.38        80
           money_insurance       0.55      0.67      0.60        67
          shopping_fashion       0.44      0.60      0.51        75
   vehicles_transportation       0.53      0.68      0.60        72

                  accuracy                           0.52       896
                 macro avg       0.51      0.52      0.51       896
              weighted avg       0.51      0.5

### SGDClassifier

In [None]:
clf = SGDClassifier(loss="log_loss", penalty="l2")
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

In [None]:
print(classification_report(y_test, y_pred))

                            precision    recall  f1-score   support

          beauty_wellbeing       0.50      0.38      0.43        69
         business_services       0.08      0.03      0.05        94
construction_manufactoring       0.20      0.21      0.21        76
        education_training       0.58      0.80      0.67        70
    electronics_technology       0.30      0.48      0.37        79
      events_entertainment       0.17      0.15      0.16        78
    food_beverages_tobacco       0.49      0.33      0.39        85
               home_garden       0.22      0.64      0.32        81
           money_insurance       0.61      0.21      0.31        90
          shopping_fashion       0.46      0.39      0.42        85
   vehicles_transportation       0.61      0.22      0.33        89

                  accuracy                           0.34       896
                 macro avg       0.38      0.35      0.33       896
              weighted avg       0.38      0.3

### KNeighborsClassifier

In [38]:
knn_clf=KNeighborsClassifier(n_neighbors=150)
knn_clf.fit(X_train,y_train)
ypred=knn_clf.predict(X_test)

In [39]:
print(classification_report(y_test, y_pred))

                            precision    recall  f1-score   support

          beauty_wellbeing       0.52      0.57      0.54        77
         business_services       0.43      0.30      0.35        90
construction_manufactoring       0.42      0.43      0.42        75
        education_training       0.67      0.71      0.69        93
    electronics_technology       0.56      0.52      0.54        83
      events_entertainment       0.42      0.30      0.35       100
    food_beverages_tobacco       0.64      0.64      0.64        84
               home_garden       0.42      0.34      0.38        80
           money_insurance       0.55      0.67      0.60        67
          shopping_fashion       0.44      0.60      0.51        75
   vehicles_transportation       0.53      0.68      0.60        72

                  accuracy                           0.52       896
                 macro avg       0.51      0.52      0.51       896
              weighted avg       0.51      0.5

### DecisionTreeClassifier

In [46]:
# Create Decision Tree classifer object
clf = DecisionTreeClassifier(criterion="entropy", max_depth=10)

# Train Decision Tree Classifer
clf = clf.fit(X_train,y_train)

#Predict the response for test dataset
y_pred = clf.predict(X_test)

In [47]:
print(classification_report(y_test, y_pred))

                            precision    recall  f1-score   support

          beauty_wellbeing       0.25      0.34      0.29        77
         business_services       0.25      0.19      0.22        90
construction_manufactoring       0.23      0.23      0.23        75
        education_training       0.40      0.33      0.36        93
    electronics_technology       0.26      0.29      0.28        83
      events_entertainment       0.21      0.21      0.21       100
    food_beverages_tobacco       0.42      0.42      0.42        84
               home_garden       0.28      0.29      0.29        80
           money_insurance       0.36      0.33      0.34        67
          shopping_fashion       0.28      0.31      0.29        75
   vehicles_transportation       0.36      0.38      0.37        72

                  accuracy                           0.30       896
                 macro avg       0.30      0.30      0.30       896
              weighted avg       0.30      0.3

### RandomForestClassifier

In [48]:
rf = RandomForestClassifier()
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)

In [49]:
print(classification_report(y_test, y_pred))

                            precision    recall  f1-score   support

          beauty_wellbeing       0.51      0.53      0.52        77
         business_services       0.35      0.21      0.26        90
construction_manufactoring       0.38      0.36      0.37        75
        education_training       0.64      0.70      0.67        93
    electronics_technology       0.47      0.49      0.48        83
      events_entertainment       0.49      0.22      0.30       100
    food_beverages_tobacco       0.51      0.63      0.57        84
               home_garden       0.47      0.33      0.39        80
           money_insurance       0.50      0.64      0.56        67
          shopping_fashion       0.40      0.61      0.48        75
   vehicles_transportation       0.46      0.62      0.53        72

                  accuracy                           0.48       896
                 macro avg       0.47      0.49      0.47       896
              weighted avg       0.47      0.4

In [51]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint

In [52]:
param_dist = {'n_estimators': randint(50,500),
              'max_depth': randint(1,20)}

# Create a random forest classifier
rf = RandomForestClassifier()

# Use random search to find the best hyperparameters
rand_search = RandomizedSearchCV(rf, 
                                 param_distributions = param_dist, 
                                 n_iter=5, 
                                 cv=5)

# Fit the random search object to the data
rand_search.fit(X_train, y_train)

In [53]:
# Create a variable for the best model
best_rf = rand_search.best_estimator_

# Print the best hyperparameters
print('Best hyperparameters:',  rand_search.best_params_)

Best hyperparameters: {'max_depth': 13, 'n_estimators': 393}


In [54]:
rf = RandomForestClassifier(max_depth=13, n_estimators=393)
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)

In [55]:
print(classification_report(y_test, y_pred))

                            precision    recall  f1-score   support

          beauty_wellbeing       0.58      0.56      0.57        77
         business_services       0.38      0.29      0.33        90
construction_manufactoring       0.37      0.37      0.37        75
        education_training       0.68      0.72      0.70        93
    electronics_technology       0.44      0.42      0.43        83
      events_entertainment       0.48      0.24      0.32       100
    food_beverages_tobacco       0.55      0.71      0.62        84
               home_garden       0.50      0.33      0.39        80
           money_insurance       0.56      0.70      0.62        67
          shopping_fashion       0.41      0.61      0.49        75
   vehicles_transportation       0.49      0.62      0.55        72

                  accuracy                           0.50       896
                 macro avg       0.49      0.51      0.49       896
              weighted avg       0.50      0.5

### GradientBoostingClassifier

In [56]:
clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0,  max_depth=1, random_state=0)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

In [57]:
print(classification_report(y_test, y_pred))

                            precision    recall  f1-score   support

          beauty_wellbeing       0.40      0.47      0.43        77
         business_services       0.29      0.22      0.25        90
construction_manufactoring       0.34      0.32      0.33        75
        education_training       0.59      0.65      0.62        93
    electronics_technology       0.43      0.49      0.46        83
      events_entertainment       0.30      0.22      0.25       100
    food_beverages_tobacco       0.48      0.52      0.50        84
               home_garden       0.32      0.28      0.30        80
           money_insurance       0.54      0.57      0.55        67
          shopping_fashion       0.40      0.47      0.43        75
   vehicles_transportation       0.53      0.58      0.56        72

                  accuracy                           0.43       896
                 macro avg       0.42      0.43      0.43       896
              weighted avg       0.42      0.4

### LGBMClassifier

In [58]:
clf = LGBMClassifier()
clf.fit(X_train, y_train)
y_pred=clf.predict(X_test)

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000728 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 5100
[LightGBM] [Info] Number of data points in the train set: 3581, number of used features: 20
[LightGBM] [Info] Start training from score -2.384305
[LightGBM] [Info] Start training from score -2.424496
[LightGBM] [Info] Start training from score -2.378262
[LightGBM] [Info] Start training from score -2.434004
[LightGBM] [Info] Start training from score -2.402654
[LightGBM] [Info] Start training from score -2.456550
[LightGBM] [Info] Start training from score -2.405745
[LightGBM] [Info] Start training from score -2.393437
[LightGBM] [Info] Start training from score -2.354452
[LightGBM] [Info] Start training from score -2.378262
[LightGBM] [Info] Start training from score -2.369267


In [59]:
print(classification_report(y_test, y_pred))

                            precision    recall  f1-score   support

          beauty_wellbeing       0.56      0.52      0.54        77
         business_services       0.39      0.30      0.34        90
construction_manufactoring       0.38      0.36      0.37        75
        education_training       0.71      0.69      0.70        93
    electronics_technology       0.46      0.45      0.45        83
      events_entertainment       0.43      0.33      0.37       100
    food_beverages_tobacco       0.58      0.67      0.62        84
               home_garden       0.39      0.36      0.38        80
           money_insurance       0.54      0.61      0.57        67
          shopping_fashion       0.43      0.59      0.50        75
   vehicles_transportation       0.54      0.65      0.59        72

                  accuracy                           0.50       896
                 macro avg       0.49      0.50      0.49       896
              weighted avg       0.49      0.5

### XGBClassifier