## Importing Required Libraries

In [3]:
#!pip install sentence-transformers faiss-cpu

In [14]:
from datetime import datetime, timedelta
import requests
import pandas as pd
import numpy as np
import time
import faiss
import re
from tqdm.auto import tqdm
import os 

from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch
from sentence_transformers import SentenceTransformer
from transformers import pipeline

In [16]:

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

df = pd.read_csv("../data/wildberries_mock_sales.csv")
df["dt"] = pd.to_datetime(df["dt"], errors="coerce").dt.strftime("%Y-%m-%d")

model_name = "gpt2"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name).to(device)
tokenizer.pad_token = tokenizer.eos_token

def generate_sentence(row):
    try:
        row_text = (
            f"On {row['dt']}, product {row['productId']} had {row['openCardCount']} product card views, "
            f"{row['addToCartCount']} items added to cart, resulting in {row['ordersCount']} orders worth "
            f"{row['ordersSumRub']} rubles. {row['buyoutsCount']} buyouts occurred, worth {row['buyoutsSumRub']} rubles, "
            f"with a buyout percentage of {row['buyoutPercent']}%. "
            f"Add-to-cart conversion: {row['addToCartConversion']}%, cart-to-order conversion: {row['cartToOrderConversion']}%."
        )

        inputs = tokenizer(row_text, return_tensors="pt", padding=True, truncation=True, max_length=512).to(device)
        outputs = model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_length=100,
            num_beams=3,  # reduce for speed
            temperature=0.7,
            no_repeat_ngram_size=2,
            early_stopping=True,
            pad_token_id=tokenizer.eos_token_id
        )
        return tokenizer.decode(outputs[0], skip_special_tokens=True).strip().split('\n')[0]
    except Exception as e:
        print(f"Error on row {row['dt']} {row.get('productId', '')}: {e}")
        return ""

tqdm.pandas()
df["generated_sentence"] = df.progress_apply(generate_sentence, axis=1)

Using device: cpu


  0%|          | 0/50 [00:00<?, ?it/s]



In [18]:
with open("wildberries.txt", "w", encoding="utf-8") as f:
    for sentence in df["generated_sentence"]:
        f.write(sentence + "\n")