In [7]:
# Install required packages
!pip install transformers tensorboard

# Create project structure
!mkdir -p src/models src/data src/training data

# Check if GPU is available
import torch
print("GPU is available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU model:", torch.cuda.get_device_name(0))

GPU is available: True
GPU model: Tesla T4


In [3]:
import importlib
import sys

modules_to_reload = ['src.models.chatbot', 'src.training.train_baseline']
for module in modules_to_reload:
    if module in sys.modules:
        del sys.modules[module]


import torch
if torch.cuda.is_available():
    torch.cuda.empty_cache()
import gc
gc.collect()

96

In [39]:
from src.models.chatbot import RestaurantChatbot
from src.training.train_baseline import train_baseline

metrics = train_baseline(
    train_path='data/train.json',
    val_path='data/val.json',
    output_dir='models/baseline',
    num_epochs=8,
    batch_size=24,
    warmup_steps=500,
    max_length=128,
    max_train_samples=8000,
    device='cuda' if torch.cuda.is_available() else 'cpu'
)

Using device: cuda
Training on 8000 samples for 8 epochs


Epoch 1/8:   0%|          | 0/375 [00:00<?, ?it/s]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
Epoch 1/8:  89%|████████▉ | 333/375 [05:27<00:41,  1.02it/s, loss=2.64]
Validation: 100%|██████████| 42/42 [00:10<00:00,  4.05it/s]



Epoch 1:
Average training loss: 3.7451
Average validation loss: 2.2958
New best model saved with validation loss: 2.2958


Epoch 2/8:  89%|████████▉ | 333/375 [05:26<00:41,  1.02it/s, loss=2.13]
Validation: 100%|██████████| 42/42 [00:10<00:00,  4.08it/s]



Epoch 2:
Average training loss: 2.3500
Average validation loss: 2.2168
New best model saved with validation loss: 2.2168


Epoch 3/8:  89%|████████▉ | 333/375 [05:26<00:41,  1.02it/s, loss=2.31]
Validation: 100%|██████████| 42/42 [00:10<00:00,  4.05it/s]



Epoch 3:
Average training loss: 2.2294
Average validation loss: 2.1893
New best model saved with validation loss: 2.1893


Epoch 4/8:  89%|████████▉ | 333/375 [05:25<00:41,  1.02it/s, loss=2.32]
Validation: 100%|██████████| 42/42 [00:10<00:00,  4.06it/s]



Epoch 4:
Average training loss: 2.1496
Average validation loss: 2.1796
New best model saved with validation loss: 2.1796


Epoch 5/8:  89%|████████▉ | 333/375 [05:25<00:41,  1.02it/s, loss=2.04]
Validation: 100%|██████████| 42/42 [00:10<00:00,  4.06it/s]



Epoch 5:
Average training loss: 2.1050
Average validation loss: 2.1743
New best model saved with validation loss: 2.1743


Epoch 6/8:  89%|████████▉ | 333/375 [05:25<00:41,  1.02it/s, loss=2.22]
Validation: 100%|██████████| 42/42 [00:10<00:00,  4.05it/s]



Epoch 6:
Average training loss: 2.0613
Average validation loss: 2.1747


Epoch 7/8:  89%|████████▉ | 333/375 [05:25<00:41,  1.02it/s, loss=2.28]
Validation: 100%|██████████| 42/42 [00:10<00:00,  4.05it/s]



Epoch 7:
Average training loss: 2.0263
Average validation loss: 2.1727
New best model saved with validation loss: 2.1727


Epoch 8/8:  89%|████████▉ | 333/375 [05:26<00:41,  1.02it/s, loss=2.15]
Validation: 100%|██████████| 42/42 [00:10<00:00,  4.05it/s]



Epoch 8:
Average training loss: 2.0129
Average validation loss: 2.1735


In [40]:
!zip -r models.zip models/

# Then download it (this will appear in Colab's file browser)
from google.colab import files
files.download('models.zip')

  adding: models/ (stored 0%)
  adding: models/rl/ (stored 0%)
  adding: models/rl/logs/ (stored 0%)
  adding: models/rl/logs/events.out.tfevents.1742529572.bf438516b6e2.696.0 (deflated 9%)
  adding: models/rl/logs/events.out.tfevents.1742529607.bf438516b6e2.696.1 (deflated 9%)
  adding: models/rl/logs/events.out.tfevents.1742529715.bf438516b6e2.696.2 (deflated 9%)
  adding: models/baseline/ (stored 0%)
  adding: models/baseline/logs/ (stored 0%)
  adding: models/baseline/logs/events.out.tfevents.1742530209.bf438516b6e2.696.5 (deflated 9%)
  adding: models/baseline/logs/events.out.tfevents.1742530674.bf438516b6e2.696.6 (deflated 9%)
  adding: models/baseline/logs/events.out.tfevents.1742530004.bf438516b6e2.696.4 (deflated 9%)
  adding: models/baseline/logs/events.out.tfevents.1742529891.bf438516b6e2.696.3 (deflated 9%)
  adding: models/baseline/best_model.pt (deflated 7%)
  adding: models/baseline_final/ (stored 0%)
  adding: models/baseline_final/best_model.pt (deflated 7%)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
from src.chat_with_bot import chat_with_bot

In [54]:
chat_with_bot()

Restaurant Chatbot is ready! Type 'quit' to exit.
Ask me about restaurant recommendations, cuisine types, or specific dishes.

You: QUIT

Goodbye!
