# Import Mistral-7B

In [5]:
from google.colab import drive
import pandas as pd

In [6]:
DIR = 'path-to-directory-in-drive'

In [None]:
drive.mount('/content/drive')

%cd "$DIR"

In [None]:
!hf auth login

In [None]:
# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-generation", model="mistralai/Mistral-7B-v0.1")

In [64]:
df = (
  pd.read_csv("auction_13_descriptions.csv", index_col=0)
)

df.head()

Unnamed: 0,item_number,Description
0,1,Impact-branded photo backdrop kit including a ...
1,2,Interfit Stellar X flash lighting unit with ad...
2,3,Interfit Stellar X 300 studio strobe light wit...
3,4,Interfit Stellar X 300 studio flash unit equip...
4,5,Interfit COR 751 lighting kit with a wheeled c...


In [66]:
def label_descriptions(data):
  min_item = data['item_number'].min()
  max_item = data['item_number'].max()

  for item in range(min_item, max_item + 1):
    row = data[data['item_number'] == item].iloc[0]
    description = row['Description']

    prompt = f"""
    You are categorizing auction items.

    Task:
    - Output exactly one category name.
    - The category must be 1–3 words only.
    - It must be a generic, broad category (e.g., "Photography Equipment", "Furniture", "Tools").
    - Do not provide multiple categories, alternatives, or synonyms.
    - Do not use parentheses, slashes, or the word "or".
    - Answer only with the category name. No explanation, no punctuation, no extra text.

    Description:
    {description}
    Answer:"""

    result = pipe(
    prompt,
    max_new_tokens=20,
    do_sample=False,
    return_full_text=False,
    eos_token_id=pipe.tokenizer.eos_token_id,
    stop_sequence="Description:"
    )

    raw = result[0]["generated_text"]
    category = next((line.strip() for line in raw.split("\n") if line.strip()), "")

    data.loc[data['item_number'] == item, 'category'] = category

In [67]:
label_descriptions(df)

Setting `pad_token_id` to `eos_token_id`:10220 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:10220 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:10220 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:10220 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:10220 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:10220 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:10220 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:10220 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:10220 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:10220 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:10220 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:10220 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:10220 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:10220 for open-end gene

In [68]:
df

Unnamed: 0,item_number,Description,category
0,1,Impact-branded photo backdrop kit including a ...,Photography Equipment
1,2,Interfit Stellar X flash lighting unit with ad...,Photography Equipment
2,3,Interfit Stellar X 300 studio strobe light wit...,Photography Equipment
3,4,Interfit Stellar X 300 studio flash unit equip...,Photography Equipment
4,5,Interfit COR 751 lighting kit with a wheeled c...,Photography Equipment
5,6,Pair of Impact floodlight fixtures mounted on ...,Photography Equipment
6,7,Assorted hand tool collection including Sears ...,Tools
7,8,Box of assorted hand tools including multiple ...,Tools
8,9,Clear storage bin filled with grinding and cut...,Metalworking
9,10,"Heavy-duty Armstrong slide hammer puller set, ...",Tools


In [73]:
def correct_categories(df, corrections):
  for item_number, category in corrections:
    df.loc[df['item_number'] == item_number, 'category'] = category
  return df

In [77]:
corrections = [
  (9, 'Tools'),
  (21, 'Lighting'),
  (32, 'Art'),
  (38, 'Collectibles'),
  (39, 'Art'),
  (40, 'Decorative'),
  (41, 'Kitchen Appliance'),
  (44, 'Rugs'),
  (47, 'Flatware'),
  (54, 'Collectibles'),
  (55, 'Collectibles'),
  (56, 'Tools')
]

df = correct_categories(df, corrections)

In [78]:
df

Unnamed: 0,item_number,Description,category
0,1,Impact-branded photo backdrop kit including a ...,Photography Equipment
1,2,Interfit Stellar X flash lighting unit with ad...,Photography Equipment
2,3,Interfit Stellar X 300 studio strobe light wit...,Photography Equipment
3,4,Interfit Stellar X 300 studio flash unit equip...,Photography Equipment
4,5,Interfit COR 751 lighting kit with a wheeled c...,Photography Equipment
5,6,Pair of Impact floodlight fixtures mounted on ...,Photography Equipment
6,7,Assorted hand tool collection including Sears ...,Tools
7,8,Box of assorted hand tools including multiple ...,Tools
8,9,Clear storage bin filled with grinding and cut...,Tools
9,10,"Heavy-duty Armstrong slide hammer puller set, ...",Tools


In [79]:
df.to_csv('auction_13_descriptions_categories.csv')