In [5]:
# Import necessary libraries
import torch

use_cuda = torch.cuda.is_available()

# Check available GPUs
device = torch.device("cuda" if use_cuda else "cpu")
print("Device: ",device)

if use_cuda:
    print('__CUDA VERSION:', torch.backends.cudnn.version())
    print('__Number CUDA Devices:', torch.cuda.device_count())
    print('__CUDA Device Name:',torch.cuda.get_device_name(0))
    print('__CUDA Device Total Memory [GB]:',torch.cuda.get_device_properties(0).total_memory/1e9)

# __CUDNN VERSION: 8401
# __Number CUDA Devices: 1
# __CUDA Device Name: NVIDIA RTX A4000
# __CUDA Device Total Memory [GB]: 16.89124864

# For example, training a simple model on GPU
# model = tf.keras.Sequential([...])  # Define your model
# model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
# model.fit(train_data, train_labels, epochs=5, validation_data=(val_data, val_labels))

Device:  cuda
__CUDA VERSION: 90101
__Number CUDA Devices: 1
__CUDA Device Name: NVIDIA GeForce RTX 3050 Laptop GPU
__CUDA Device Total Memory [GB]: 3.962765312


In [1]:
from transformers import AutoTokenizer
import transformers
import torch
import gradio as gr

  _torch_pytree._register_pytree_node(


In [2]:
# Model and Tokenizer Initialization
model = "PY007/TinyLlama-1.1B-Chat-v0.1"
tokenizer = AutoTokenizer.from_pretrained(model)

# Pipeline Initialization
pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    torch_dtype=torch.float16,
    device_map="auto",
)

  _torch_pytree._register_pytree_node(


In [8]:
def generate_text(prompt):
    # Prompt
    # prompt = "If I give one of my two apples to my girlfriend,  how many apples do I have? Options: \nA. 2\nB. 1\nC. 4\nD. 0\nAnswer with a single option and explaination\n"
    formatted_prompt = (
        f"### Human: {prompt}### Assistant: "
    )

    # Generate the Texts
    sequences = pipeline(
        formatted_prompt,
        do_sample=True,
        top_k=50,
        top_p = 0.7,
        num_return_sequences=1,
        repetition_penalty=1.1,
        max_new_tokens=100,
    )
    return sequences[0]["generated_text"].split('### Assistant: ')[1]

# print(sequences)

# Print the result
# for seq in sequences:
#     print(f"Result: {seq['generated_text']}")

In [9]:
# Gradio Interface
interface = gr.Interface(
    fn=generate_text,
    inputs=gr.Textbox(lines=5, placeholder="Enter your prompt here..."),
    outputs="text",
    title="TinyLlama Text Generator",
    description="Enter a prompt to generate text using the TinyLlama model.",
)

# Launch the Gradio app
interface.launch()

Running on local URL:  http://127.0.0.1:7862

To create a public link, set `share=True` in `launch()`.


