In [1]:
import os
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# Optional: redirect Hugging Face model/cache to D:
os.environ["HF_HOME"] = "D:/HF_CACHE"
os.environ["TRANSFORMERS_CACHE"] = "D:/HF_CACHE"

# Model ID
model_id = "microsoft/phi-2"

# Check GPU
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", torch.cuda.get_device_name(0) if device == "cuda" else "CPU")

# Load tokenizer & model
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.float16 if device == "cuda" else torch.float32,
    device_map="auto"
)

# Prompt
prompt = "Explain gravity to a 5-year-old."

# Tokenize input
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

# Generate output
with torch.no_grad():
    output = model.generate(**inputs, max_new_tokens=150)

# Decode and print
response = tokenizer.decode(output[0], skip_special_tokens=True)
print("\n Phi-2 Response:\n")
print(response)


  from .autonotebook import tqdm as notebook_tqdm


Using device: NVIDIA GeForce RTX 3060


Loading checkpoint shards: 100%|██████████| 2/2 [00:06<00:00,  3.41s/it]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



 Phi-2 Response:

Explain gravity to a 5-year-old.
## INPUT
Gravity is a force that pulls everything down to the ground.
##OUTPUT
Gravity is like a big hug from the earth that makes everything stay on the ground. It is stronger when things are closer together, like when you jump, and weaker when things are farther apart, like when you throw a ball. Gravity is what keeps us from floating away into space.



In [None]:
import os
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

#  Use local cache only
os.environ["HF_HOME"] = "D:/HF_CACHE"

model_id = "microsoft/phi-2"
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", torch.cuda.get_device_name(0) if device == "cuda" else "CPU")

#  Load model and tokenizer offline
tokenizer = AutoTokenizer.from_pretrained(model_id, local_files_only=True)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.float16 if device == "cuda" else torch.float32,
    device_map="auto",
    local_files_only=True
)

#  Chat loop for Jupyter
def chat_with_phi2():
    print("Phi-2 Chat is ready. Type 'exit' to stop.")
    chat_history = ""

    while True:
        user_input = input("\nYou: ")
        if user_input.lower().strip() == "exit":
            print("Chat ended.")
            break

        prompt = f"{chat_history}\nHuman: {user_input}\nAssistant:"
        inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

        with torch.no_grad():
            output = model.generate(
                **inputs,
                max_new_tokens=200,
                pad_token_id=tokenizer.eos_token_id,
                do_sample=True,
                temperature=0.7
            )

        reply = tokenizer.decode(output[0], skip_special_tokens=True)
        reply = reply.split("Assistant:")[-1].strip()

        print(f"\nPhi-2: {reply}")
        chat_history += f"\nHuman: {user_input}\nAssistant: {reply}"

#  Start the chat
chat_with_phi2()


Using device: NVIDIA GeForce RTX 3060


Loading checkpoint shards: 100%|██████████| 2/2 [00:07<00:00,  3.61s/it]


Phi-2 Chat is ready. Type 'exit' to stop.


In [4]:
import os
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from IPython.display import display, Markdown
import ipywidgets as widgets

# Set Hugging Face cache
os.environ["HF_HOME"] = "D:/HF_CACHE"

# Load model/tokenizer locally
model_id = "microsoft/phi-2"
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using:", torch.cuda.get_device_name(0) if device == "cuda" else "CPU")

tokenizer = AutoTokenizer.from_pretrained(model_id, local_files_only=True)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.float16 if device == "cuda" else torch.float32,
    device_map="auto",
    local_files_only=True
)

# Chat memory
chat_history = ""

# Create widgets
input_box = widgets.Text(
    value='',
    placeholder='Type your message...',
    description='You:',
    disabled=False,
    layout=widgets.Layout(width='100%')
)

output_box = widgets.Output()

def respond(change):
    global chat_history
    user_input = change['new']
    if user_input.strip().lower() == "exit":
        input_box.disabled = True
        display(Markdown("**Chat ended.**"))
        return

    prompt = f"{chat_history}\nHuman: {user_input}\nAssistant:"
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        output = model.generate(
            **inputs,
            max_new_tokens=200,
            pad_token_id=tokenizer.eos_token_id,
            do_sample=True,
            temperature=0.7
        )

    reply = tokenizer.decode(output[0], skip_special_tokens=True)
    reply = reply.split("Assistant:")[-1].strip()

    with output_box:
        display(Markdown(f"**You:** {user_input}"))
        display(Markdown(f"**Phi-2:** {reply}"))

    chat_history += f"\nHuman: {user_input}\nAssistant: {reply}"
    input_box.value = ''  # clear input

# Attach function
input_box.observe(respond, names='value')

# Display UI
display(input_box)
display(output_box)


Using: NVIDIA GeForce RTX 3060


Loading checkpoint shards: 100%|██████████| 2/2 [00:05<00:00,  2.88s/it]


Text(value='', description='You:', layout=Layout(width='100%'), placeholder='Type your message...')

Output()

In [2]:
!pip install ipywidgets
!jupyter nbextension enable --py widgetsnbextension


Collecting ipywidgets
  Downloading ipywidgets-8.1.7-py3-none-any.whl.metadata (2.4 kB)
Collecting widgetsnbextension~=4.0.14 (from ipywidgets)
  Downloading widgetsnbextension-4.0.14-py3-none-any.whl.metadata (1.6 kB)
Collecting jupyterlab_widgets~=3.0.15 (from ipywidgets)
  Downloading jupyterlab_widgets-3.0.15-py3-none-any.whl.metadata (20 kB)
Downloading ipywidgets-8.1.7-py3-none-any.whl (139 kB)
Downloading jupyterlab_widgets-3.0.15-py3-none-any.whl (216 kB)
Downloading widgetsnbextension-4.0.14-py3-none-any.whl (2.2 MB)
   ---------------------------------------- 0.0/2.2 MB ? eta -:--:--
   ---------------------------------------- 0.0/2.2 MB ? eta -:--:--
   ---------------------------------------- 0.0/2.2 MB ? eta -:--:--
   ---- ----------------------------------- 0.3/2.2 MB ? eta -:--:--
   ---- ----------------------------------- 0.3/2.2 MB ? eta -:--:--
   ---- ----------------------------------- 0.3/2.2 MB ? eta -:--:--
   --------- ------------------------------ 0.5/2.2 MB

usage: jupyter [-h] [--version] [--config-dir] [--data-dir] [--runtime-dir]
               [--paths] [--json] [--debug]
               [subcommand]

Jupyter: Interactive Computing

positional arguments:
  subcommand     the subcommand to launch

options:
  -h, --help     show this help message and exit
  --version      show the versions of core jupyter packages and exit
  --config-dir   show Jupyter config dir
  --data-dir     show Jupyter data dir
  --runtime-dir  show Jupyter runtime dir
  --paths        show all Jupyter paths. Add --json for machine-readable
                 format.
  --json         output paths as machine-readable json
  --debug        output debug information about paths

Available subcommands: kernel kernelspec migrate run troubleshoot

Jupyter command `jupyter-nbextension` not found.
