In [5]:
!pip install torch torchtext transformers sentencepiece pandas tqdm datasets



In [6]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [21]:
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import pandas as pd
import time

def load_model(model_path):
    model = torch.load(model_path)
    model.eval()  # Set model to evaluation mode
    return model

def make_predictions(model, input_str, tokenizer):
    input_ids = tokenizer.encode(input_str, return_tensors='pt').to('cuda')
    model.to('cuda')
    output = model.generate(
        input_ids,
        max_length=20,
        num_return_sequences=1,
        do_sample=True,
        top_k=5,
        top_p=0.8,
        temperature=0.2,
        repetition_penalty=1.0
    )
    decoded_output = tokenizer.decode(output[0], skip_special_tokens=True)
    print(decoded_output)
    return decoded_output

if __name__ == "__main__":
    model_path = '/content/drive/MyDrive/SAP_SLM/SmallMedLM.pt'
    dataset_path = '/content/drive/MyDrive/SAP_SLM/clothes_price_prediction_data.csv'

    tokenizer = GPT2Tokenizer.from_pretrained('distilgpt2')

    model = load_model(model_path)

    previous_dataset_snapshot = None

    # Continuously monitor the dataset for changes
    while True:
        df = pd.read_csv(dataset_path)

        # Check for changes in the dataset
        if previous_dataset_snapshot is None or not df.equals(previous_dataset_snapshot):
            print("Detected dataset changes. Reloading model and making predictions...")

            previous_dataset_snapshot = df.copy()

            input_str = "puma Jacket"
            prediction = make_predictions(model, input_str, tokenizer)
            print("Prediction:", prediction)

            row_parts = prediction.split('|')
            data = row_parts[0].strip()
            id_value = int(row_parts[1].strip())
            print("Product ID: ",id_value)
            condition = (df['Id'] == id_value)
            index = df.loc[condition].index
            row = df.loc[index]
            print("Row:")
            print(row)

            model = load_model(model_path)
        time.sleep(60)


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Detected dataset changes. Reloading model and making predictions...
puma Jacket | 715
Prediction: puma Jacket | 715
Product ID:  715
Row:
    Brand Category  Color Size Material  Price   Id
714  Puma   Jacket  White   XS     Wool     14  715


KeyboardInterrupt: 

In [20]:
#giving inputs but accuracy will go

import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import pandas as pd
import time

def load_model(model_path):
    model = torch.load(model_path)
    model.eval()  # Set model to evaluation mode
    return model

def make_predictions(model, input_str, tokenizer):
    input_ids = tokenizer.encode(input_str, return_tensors='pt').to('cuda')
    model.to('cuda')
    output = model.generate(
        input_ids,
        max_length=20,
        num_return_sequences=1,
        do_sample=True,
        top_k=5,
        top_p=0.8,
        temperature=0.2,
        repetition_penalty=1.0
    )
    decoded_output = tokenizer.decode(output[0], skip_special_tokens=True)
    print(decoded_output)
    return decoded_output

if __name__ == "__main__":
    model_path = '/content/drive/MyDrive/SAP_SLM/SmallMedLM.pt'
    dataset_path = '/content/drive/MyDrive/SAP_SLM/clothes_price_prediction_data.csv'

    tokenizer = GPT2Tokenizer.from_pretrained('distilgpt2')

    # Load initial model
    model = load_model(model_path)

    # Initialize variable to store previous dataset snapshot
    previous_dataset_snapshot = None

    # Continuously monitor the dataset for changes
    while True:
        # Load dataset
        df = pd.read_csv(dataset_path)

        # Check for changes in the dataset
        if previous_dataset_snapshot is None or not df.equals(previous_dataset_snapshot):
            print("Detected dataset changes. Reloading model and making predictions...")

            # Update the previous dataset snapshot
            previous_dataset_snapshot = df.copy()

            # User input
            user_input = input("Please provide your preferences separated by spaces (Brand Category Color Size Material Maximum_Price Maximum_Delivery_Days): ")
            preferences = user_input.split()

            # Extract preferences
            brand = preferences[0]
            category = preferences[1]
            color = preferences[2]
            size = preferences[3]
            material = preferences[4]
            max_price = float(preferences[5])
            max_delivery_days = int(preferences[6])

            # Generate input string for the model
            input_str = f"{brand} {category} {color} {size} {material}"

            # Make predictions
            prediction = make_predictions(model, input_str, tokenizer)
            print("Prediction:", prediction)
            row_parts = prediction.split('|')
            data = row_parts[0].strip()  # Extracting the row data and removing leading/trailing whitespace
            id_value = int(row_parts[1].strip())
            print("Product ID: ",id_value)
            condition = (df['Id'] == id_value)
            index = df.loc[condition].index
            row = df.loc[index]
            print("Row:")
            print(row)
            model = load_model(model_path)
        time.sleep(15)  # Check every 60 seconds




Detected dataset changes. Reloading model and making predictions...
Please provide your preferences separated by spaces (Brand Category Color Size Material Maximum_Price Maximum_Delivery_Days):  Puma Shirt white XL cotton 400 4


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Puma Shirt white XL cotton | 629
Prediction: Puma Shirt white XL cotton | 629
Product ID:  629
Row:
    Brand Category Color Size Material  Price   Id
628  Puma   Jacket  Blue   XS   Cotton    144  629


KeyboardInterrupt: 