# LLM for predicting the Warehouse demand to optimize the Inventory using GPT-2 model :

In [13]:
# Step 0: Importing required library packages 
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
from datasets import Dataset
import pandas as pd


In [14]:
# Step 1: Load the dataset
data = pd.read_csv("Warehouse_Prediction_Dataset.csv") 
data_dict = {
    "query": data["Query"].tolist(),
    "response": data["Response"].tolist()
}


# Convert to HuggingFace Dataset
hf_dataset = Dataset.from_dict(data_dict)



In [22]:
data

Unnamed: 0,Query,Response,Context
0,What is the current stock level of Product A?,The current stock level of Product A is 500 un...,Product A is a high-demand item.
1,When should we restock Product B?,Product B should be restocked in 2 weeks.,"Product B has a lead time of 3 weeks, so plan ..."
2,What is the forecasted demand for Product C ne...,The forecasted demand for Product C next month...,Product C is affected by seasonal demand fluct...
3,How many units of Product D were sold last week?,"Last week, 300 units of Product D were sold.",Product D sales are consistent week over week.
4,What is the lead time for Product E?,The lead time for Product E is 5 days.,"Product E is sourced locally, ensuring quick d..."
5,Are there any seasonal trends for Product F?,Product F has higher sales during the holiday ...,Product F peaks in December due to holiday dem...
6,What is the recommended reorder quantity for P...,The recommended reorder quantity for Product G...,Product G has a bulk discount when reordered i...
7,What is the storage capacity utilization in th...,The warehouse is currently at 75% storage capa...,Storage optimization is being monitored for ef...
8,Which products are at risk of stockout?,Products X and Y are at risk of stockout based...,Stockout risk is calculated based on current s...
9,What is the average daily sales of Product H?,The average daily sales of Product H are 50 un...,Sales data for Product H shows steady growth.


In [15]:
# Step 2: Split the dataset using Hugging Face's method
train_test = hf_dataset.train_test_split(test_size=0.2) 
train_dataset = train_test['train']
eval_dataset = train_test['test']



In [16]:
# Step 3: Tokenize the data
tokenizer = AutoTokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token

def preprocess_function(examples):
    inputs = tokenizer(
        examples["query"],
        max_length=128,
        padding="max_length",
        truncation=True
    )
    labels = tokenizer(
        examples["response"],
        max_length=128,
        padding="max_length",
        truncation=True
    )
    inputs["labels"] = labels["input_ids"]
    return inputs

# Applying tokenizer and preparing dataset
encoded_train_dataset = train_dataset.map(preprocess_function, batched=True, remove_columns=["query", "response"])
encoded_eval_dataset = eval_dataset.map(preprocess_function, batched=True, remove_columns=["query", "response"])



loading configuration file config.json from cache at C:\Users\AjayG/.cache\huggingface\hub\models--gpt2\snapshots\607a30d783dfa663caf39e06633721c8d4cfcd7e\config.json
Model config GPT2Config {
  "_name_or_path": "gpt2",
  "activation_function": "gelu_new",
  "architectures": [
    "GPT2LMHeadModel"
  ],
  "attn_pdrop": 0.1,
  "bos_token_id": 50256,
  "embd_pdrop": 0.1,
  "eos_token_id": 50256,
  "initializer_range": 0.02,
  "layer_norm_epsilon": 1e-05,
  "model_type": "gpt2",
  "n_ctx": 1024,
  "n_embd": 768,
  "n_head": 12,
  "n_inner": null,
  "n_layer": 12,
  "n_positions": 1024,
  "reorder_and_upcast_attn": false,
  "resid_pdrop": 0.1,
  "scale_attn_by_inverse_layer_idx": false,
  "scale_attn_weights": true,
  "summary_activation": null,
  "summary_first_dropout": 0.1,
  "summary_proj_to_labels": true,
  "summary_type": "cls_index",
  "summary_use_proj": true,
  "task_specific_params": {
    "text-generation": {
      "do_sample": true,
      "max_length": 50
    }
  },
  "transfor

Map:   0%|          | 0/8 [00:00<?, ? examples/s]

Map:   0%|          | 0/2 [00:00<?, ? examples/s]

In [17]:
# Step 4: Define the model
model = AutoModelForCausalLM.from_pretrained("gpt2") # Model:



loading configuration file config.json from cache at C:\Users\AjayG/.cache\huggingface\hub\models--gpt2\snapshots\607a30d783dfa663caf39e06633721c8d4cfcd7e\config.json
Model config GPT2Config {
  "_name_or_path": "gpt2",
  "activation_function": "gelu_new",
  "architectures": [
    "GPT2LMHeadModel"
  ],
  "attn_pdrop": 0.1,
  "bos_token_id": 50256,
  "embd_pdrop": 0.1,
  "eos_token_id": 50256,
  "initializer_range": 0.02,
  "layer_norm_epsilon": 1e-05,
  "model_type": "gpt2",
  "n_ctx": 1024,
  "n_embd": 768,
  "n_head": 12,
  "n_inner": null,
  "n_layer": 12,
  "n_positions": 1024,
  "reorder_and_upcast_attn": false,
  "resid_pdrop": 0.1,
  "scale_attn_by_inverse_layer_idx": false,
  "scale_attn_weights": true,
  "summary_activation": null,
  "summary_first_dropout": 0.1,
  "summary_proj_to_labels": true,
  "summary_type": "cls_index",
  "summary_use_proj": true,
  "task_specific_params": {
    "text-generation": {
      "do_sample": true,
      "max_length": 50
    }
  },
  "transfor

In [18]:
# Step 5: Fine-tune the model
training_args = TrainingArguments(
    output_dir="./warehouse_model",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=4,
    num_train_epochs=3,
    save_strategy="epoch",
    logging_dir="./logs",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=encoded_train_dataset,
    eval_dataset=encoded_eval_dataset,
)

trainer.train()


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 8
  Num Epochs = 3
  Instantaneous batch size per device = 4
  Total train batch size (w. parallel, distributed & accumulation) = 4
  Gradient Accumulation steps = 1
  Total optimization steps = 6
  Number of trainable parameters = 124439808


Epoch,Training Loss,Validation Loss
1,No log,8.306581
2,No log,6.556075
3,No log,5.836059


***** Running Evaluation *****
  Num examples = 2
  Batch size = 8
Saving model checkpoint to ./warehouse_model\checkpoint-2
Configuration saved in ./warehouse_model\checkpoint-2\config.json
Model weights saved in ./warehouse_model\checkpoint-2\pytorch_model.bin
***** Running Evaluation *****
  Num examples = 2
  Batch size = 8
Saving model checkpoint to ./warehouse_model\checkpoint-4
Configuration saved in ./warehouse_model\checkpoint-4\config.json
Model weights saved in ./warehouse_model\checkpoint-4\pytorch_model.bin
***** Running Evaluation *****
  Num examples = 2
  Batch size = 8
Saving model checkpoint to ./warehouse_model\checkpoint-6
Configuration saved in ./warehouse_model\checkpoint-6\config.json
Model weights saved in ./warehouse_model\checkpoint-6\pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)




TrainOutput(global_step=6, training_loss=7.3150787353515625, metrics={'train_runtime': 49.3135, 'train_samples_per_second': 0.487, 'train_steps_per_second': 0.122, 'total_flos': 1567752192000.0, 'train_loss': 7.3150787353515625, 'epoch': 3.0})

In [19]:
# Step 6: Save the fine-tuned model
model.save_pretrained("./warehouse_llm")
tokenizer.save_pretrained("./warehouse_llm")



Configuration saved in ./warehouse_llm\config.json
Model weights saved in ./warehouse_llm\pytorch_model.bin
tokenizer config file saved in ./warehouse_llm\tokenizer_config.json
Special tokens file saved in ./warehouse_llm\special_tokens_map.json


('./warehouse_llm\\tokenizer_config.json',
 './warehouse_llm\\special_tokens_map.json',
 './warehouse_llm\\vocab.json',
 './warehouse_llm\\merges.txt',
 './warehouse_llm\\added_tokens.json',
 './warehouse_llm\\tokenizer.json')

In [20]:
# Step 7: Inference Function with Numerical Prediction Capability
def get_response(query):
    # Check if query is a number for direct prediction
    try:
        feature_value = float(query)
        # Here you would implement your regression logic to predict demand based on feature_value.
        predicted_demand = 10 + (feature_value * 5)  # Example linear relationship
        return f"The predicted demand for a feature value of {feature_value} is {predicted_demand:.2f}."
    
    except ValueError:
        # If it's not a number, use the language model to generate a response.
        inputs = tokenizer(query, return_tensors="pt", padding=True, truncation=True, max_length=128)
        outputs = model.generate(**inputs, max_length=128, num_return_sequences=1)
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        
        return response

    


In [21]:
# Step 8: Chatbot Interface for Interaction
def chatbot_interface():
    print("Welcome to the Warehouse Demand Prediction Chatbot!")
    print("Type 'exit' to quit.")
    
    while True:
        user_input = input("You: ")
        
        if user_input.lower() == 'exit':
            print("Goodbye!")
            break
        
        response = get_response(user_input)
        print("Bot:", response)

if __name__ == "__main__":
    chatbot_interface()


Welcome to the Warehouse Demand Prediction Chatbot!
Type 'exit' to quit.
You: 8.693
Bot: The predicted demand for a feature value of 8.693 is 53.46.
You: exit
Goodbye!
