In [5]:
import requests
import gzip
import json
import pandas as pd
import numpy as np 
import torch
from transformers import pipeline 

In [6]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [7]:
categories_df = pd.read_csv("../data/amazon_categories.csv")
product_df = pd.read_csv("../data/amazon_products.csv")

In [4]:
categories_df.head(1)

Unnamed: 0,id,category_name
0,1,Beading & Jewelry Making


In [5]:
product_df.columns

Index(['asin', 'title', 'imgUrl', 'productURL', 'stars', 'reviews', 'price',
       'listPrice', 'category_id', 'isBestSeller', 'boughtInLastMonth'],
      dtype='object')

In [6]:
def create_prompt(row):
    prompt = "You are a product description generator. Given structured product data, write a clear and attractive product summary paragraph.\n\nHere is the product data:\n"
    for col, val in row.items():
        if pd.notna(val) and str(val).strip():
            prompt += f"- {col}: {val}\n"
    prompt += "\nOutput:"
    return prompt


In [7]:
first_row = product_df.iloc[0].to_dict()

prompt = "Write a brief description for this product with these details:\n"

for key, value in first_row.items():
    prompt += f"{key}: {value}\n"

print(prompt)

Write a brief description for this product with these details:
asin: B014TMV5YE
title: Sion Softside Expandable Roller Luggage, Black, Checked-Large 29-Inch
imgUrl: https://m.media-amazon.com/images/I/815dLQKYIYL._AC_UL320_.jpg
productURL: https://www.amazon.com/dp/B014TMV5YE
stars: 4.5
reviews: 0
price: 139.99
listPrice: 0.0
category_id: 104
isBestSeller: False
boughtInLastMonth: 2000



In [8]:

response = requests.post(
    "http://localhost:11434/api/generate",
    json={
        "model": "mistral",     # Change this if your model has a different name
        "prompt": prompt,
        "stream": False
    }
)

# Output the response
print(response.json()["response"])


 Experience the Sion Softside Expandable Roller Luggage, a stylish and practical choice for your travel needs. This checked-large 29-inch luggage, available in black, is currently priced at $139.99. Although it doesn't have any reviews yet, it boasts a promising 4.5 stars rating. Featuring an expandable design and a sleek softside construction, this roller luggage offers ample space for your belongings. With over 2000 pieces sold within the last month, you can trust in its quality. Purchase now at [productURL]. The product image can be found at [imgUrl].


In [9]:
first_row['summary'] = response.json()["response"]

In [10]:
first_row

{'asin': 'B014TMV5YE',
 'title': 'Sion Softside Expandable Roller Luggage, Black, Checked-Large 29-Inch',
 'imgUrl': 'https://m.media-amazon.com/images/I/815dLQKYIYL._AC_UL320_.jpg',
 'productURL': 'https://www.amazon.com/dp/B014TMV5YE',
 'stars': 4.5,
 'reviews': 0,
 'price': 139.99,
 'listPrice': 0.0,
 'category_id': 104,
 'isBestSeller': False,
 'boughtInLastMonth': 2000,
 'summary': " Experience the Sion Softside Expandable Roller Luggage, a stylish and practical choice for your travel needs. This checked-large 29-inch luggage, available in black, is currently priced at $139.99. Although it doesn't have any reviews yet, it boasts a promising 4.5 stars rating. Featuring an expandable design and a sleek softside construction, this roller luggage offers ample space for your belongings. With over 2000 pieces sold within the last month, you can trust in its quality. Purchase now at [productURL]. The product image can be found at [imgUrl]."}

In [33]:
Best_Seller = product_df['isBestSeller'] == True

In [39]:
best_selling_product = product_df[Best_Seller]
best_selling_product['category_id'].nunique()

222

## Test 2 

In [9]:
# Clean column names
product_df.columns = product_df.columns.str.strip().str.lower()


# Fill missing values with sensible defaults
product_df.fillna({
    'title':'No title',
    'stars': 0.0,
    'reviews':0,
    'price': '0',
    'listprice' : '0',
    'category_id' : 'unknown',
    'isbestseller': False,
    'boughtinlastmonth': 0}).count()


asin                 1426337
title                1426337
imgurl               1426337
producturl           1426337
stars                1426337
reviews              1426337
price                1426337
listprice            1426337
category_id          1426337
isbestseller         1426337
boughtinlastmonth    1426337
dtype: int64

In [11]:
product_df['title'] = product_df['title'].astype(str).str.strip()
product_df['stars'] = pd.to_numeric(product_df['stars'], errors='coerce').fillna(0.0)
product_df['reviews'] = pd.to_numeric(product_df['reviews'], errors='coerce').fillna(0).astype(int)
product_df['price'] = product_df['price'].astype(str).str.replace('$', '', regex=False).astype(float)
product_df['listprice'] = product_df['listprice'].astype(str).str.replace('$', '', regex=False).astype(float)
product_df['boughtinlastmonth'] = pd.to_numeric(product_df['boughtinlastmonth'], errors='coerce').fillna(0).astype(int)
product_df['isbestseller'] = product_df['isbestseller'].astype(str).str.lower().isin(['true', '1', 'yes'])

In [12]:
def build_text(row):
    text = f"Product: {row['title']} | "
    text += f"Category ID: {row['category_id']} | "
    text += f"Price: ${row['price']:.2f} | "
    if row['listprice'] > row['price']:
        discount = row['listprice'] - row['price']
        text += f"Discount: ${discount:.2f} from list price ${row['listprice']:.2f} | "
    text += f"Stars: {row['stars']} stars from {row['reviews']} reviews | "
    if row['isbestseller']:
        text += "🔥 Best Seller! | "
    if row['boughtinlastmonth'] > 0:
        text += f"Popular: Bought {row['boughtinlastmonth']} times last month | "
    return text.strip()

In [14]:
product_df['text'] = product_df.apply(build_text, axis=1)

In [17]:
product_df['text'][0]

'Product: Sion Softside Expandable Roller Luggage, Black, Checked-Large 29-Inch | Category ID: 104 | Price: $139.99 | Stars: 4.5 stars from 0 reviews | Popular: Bought 2000 times last month |'