In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import joblib

In [2]:
# Load the data
data = pd.read_csv('train2.csv', encoding="ISO-8859-1")
data["brand_name"].fillna(value="missing", inplace=True)
data["name"].fillna(value="missing", inplace=True)
data["item_description"].fillna(value="missing", inplace=True)
data["category_name"].fillna(value="missing", inplace=True)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    data[['name', 'item_description', 'category_name']], 
    data['brand_name'], 
    test_size=0.2, 
    random_state=42)

In [3]:
# Vectorize the text data using TF-IDF
vectorizer = TfidfVectorizer(max_features=5000)
X_train_tfidf = vectorizer.fit_transform(X_train['name'] + ' ' + X_train['item_description'] + ' ' + X_train['category_name'])
X_test_tfidf = vectorizer.transform(X_test['name'] + ' ' + X_test['item_description'] + ' ' + X_test['category_name'])

In [4]:
# Train an SVM model
svm = SVC(kernel='linear', C=1, decision_function_shape='ovr')
svm.fit(X_train_tfidf, y_train)

In [5]:
# Make predictions on the test set
y_test = y_test [:1000]
X_test_tfidf = X_test_tfidf [:1000]

y_pred = svm.predict(X_test_tfidf)

# Evaluate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy:', accuracy)

Accuracy: 0.769


In [6]:

joblib.dump(svm, 'svm_model.pkl')

['svm_model.pkl']

In [7]:
y_pred[:10]

array(['American Eagle', 'missing', 'Nintendo', 'missing', 'missing',
       'missing', 'missing', 'Nike', 'missing', 'LuLaRoe'], dtype=object)

In [8]:
y_test[:10]

143289    American Eagle
35722            missing
6347            Nintendo
8693             missing
142575           missing
31175            missing
8295             missing
122751              Nike
124508           missing
81716            LuLaRoe
Name: brand_name, dtype: object

In [9]:
# Test item
name = "iPad Pro 11-inch (1st Gen) + Apple Pencil + Logitech Folio Keyboard Case"
description = "256 GB capacity and cellular capability, running on iPadOS version 16.4. Used only for university classes for about a year, still in great condition. $900 OBO."
category = "Computer Keyboards"

single_point_test = X_test [:1]
single_point_test['name'] =  name
single_point_test['category_name'] =  category

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  single_point_test['name'] =  name
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  single_point_test['category_name'] =  category


In [10]:
single_point_test

Unnamed: 0,name,item_description,category_name
143289,iPad Pro 11-inch (1st Gen) + Apple Pencil + Lo...,No description yet,Computer Keyboards


In [11]:
X_new_tfidf = vectorizer.transform(single_point_test['name'] + ' ' + single_point_test['item_description'] + ' ' + single_point_test['category_name'])

In [12]:
y_pred = svm.predict(X_new_tfidf)

In [13]:
X_train

Unnamed: 0,name,item_description,category_name
68467,#220. Med PINK,Great lightly used condition. Ships next busin...,Women/Tops & Blouses/T-Shirts
69267,iPhone 6/6s Plus Michael Jordan Case,#NAME?,"Electronics/Cell Phones & Accessories/Cases, C..."
17497,Vera Bradley Petite Travel Jewelry Bag,Vera Bradley petite travel jewelry bag measure...,Women/Women's Accessories/Other
4495,Bnib number 16 fan brush it,It brush from Ulta number 16 fan brush,Beauty/Tools & Accessories/Makeup Brushes & Tools
91963,Gerber Outdoor Steady Multi Tool,New in box,Sports & Outdoors/Outdoors/Hiking & Camping
...,...,...,...
119879,32c nude Victoria's Secret push up bra,First pic is both of the bras. On top in all o...,Women/Underwear/Bras
103694,Women's Plaid Flannel Button Down,Like new!,Women/Tops & Blouses/Button Down Shirt
131932,LPS #334 Ferret,In good condition some minor wear on nose and ...,Kids/Toys/Action Figures & Statues
146867,TopShop Moto Joni Jeans,Super high waisted fade black skinny pants. Si...,"Women/Jeans/Slim, Skinny"


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=237fc509-a989-4993-960f-769c3c5cd1bb' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>