In [32]:
import pandas as pd
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch
import warnings
warnings.filterwarnings("ignore")

In [34]:
# Load CSV file
df2 = pd.read_csv("../fetch_data/wildberries_data.csv")

In [36]:
# Load GPT-2 model and tokenizer
model_name = "gpt2"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

In [24]:
# Function to generate summary sentence
def generate_sentence(row):
    row_text = f"On {row['dt']}, {row['openCardCount']} product cards were opened, {row['addToCartCount']} items were added to the cart, resulting in {row['ordersCount']} orders worth {row['ordersSumRub']} rubles. {row['buyoutsCount']} buyouts occurred, worth {row['buyoutsSumRub']} rubles, with a buyout percentage of {row['buyoutPercent']}%. The add-to-cart conversion rate was {row['addToCartConversion']}%, and the cart-to-order conversion rate was {row['cartToOrderConversion']}%."

    inputs = tokenizer(row_text, return_tensors="pt", padding=True, truncation=True, max_length=512)
    outputs = model.generate(
        inputs['input_ids'],
        attention_mask=inputs['attention_mask'],
        max_length=100,
        num_beams=5,
        temperature=0.7,
        no_repeat_ngram_size=2,
        early_stopping=True,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True).strip().split('\n')[0]

In [26]:
# Generate sentences
df2["generated_sentence"] = df2.apply(generate_sentence, axis=1)

In [28]:
# Display the generated sentences
for sentence in df2["generated_sentence"]:
    print(sentence, "\n")

In [30]:
# Save to text file
with open("wildberries.txt", "w", encoding="utf-8") as f:
    f.write("\n".join(df2["generated_sentence"].tolist()))

print(f"Generated text saved to 'wildberries.txt'.")