In [2]:
!pip install -q huggingface_hub

In [3]:
from huggingface_hub import InferenceClient
from google.colab import userdata

In [4]:

# Retrieve the token from Colab's secrets
hf_api_token = userdata.get("HF_TOKEN")

# Check if the token was successfully retrieved
if hf_api_token is None:
    print("ERROR: HF_TOKEN was NOT found in Colab Secrets.")
    print("Please go to the 'Secrets' tab (key icon on left), ensure 'HF_TOKEN' is present,")
    print("its value is correct, and 'Notebook access' is ON. Then, restart the runtime.")
    exit()

In [5]:
HF_MODEL_ID = "deepseek-ai/DeepSeek-R1-0528"
HF_PROVIDER = "together"

# 4. Initialize the Hugging Face Inference Client with the specific provider
try:
    client = InferenceClient(
        token=hf_api_token,    # Pass token
        # model=HF_MODEL_ID
    )
except Exception as e:
    print(f"ERROR: Could not initialize Hugging Face InferenceClient: {e}")
    print("Please check your HF_TOKEN and provider name.")
    exit()

In [6]:
# 5. Function to get chat completion
def get_deepseek_completion(prompt_text, model_id=HF_MODEL_ID, temperature=0.7, max_tokens=150, top_p=1.0):
    """
    Sends a prompt to the MiniMaxAI/MiniMax-M1-80k model via its chat completion API.
    Parameters:
        prompt_text (str): The user's input message.
        model_id (str): The ID of the model to use (defaults to MiniMaxAI/MiniMax-M1-80k).
        temperature (float): Controls randomness (0.0 to 1.0).
        max_tokens (int): Maximum number of tokens to generate (equivalent to max_new_tokens).
        top_p (float): Nucleus sampling parameter (0.0 to 1.0).
    """
    messages = [
        # You can add a system message here if the model benefits from it.
        # {"role": "system", "content": "You are a helpful and concise assistant."},
        {
            "role": "user",
            "content": prompt_text
        }
    ]

    try:
        completion = client.chat.completions.create(
            model=model_id,
            messages=messages,
            temperature=temperature,
            max_tokens=max_tokens, # Note: 'max_tokens' is used here, not 'max_new_tokens'
            top_p=top_p,
            stream=False # For single response; set to True for streaming tokens
        )

        if completion.choices:
            return completion.choices[0].message.content.strip()
        else:
            return "No choices returned by the model."

    except Exception as e:
        import traceback
        print(f"Hugging Face API Error during generation: {e}")
        traceback.print_exc()
        return f"An error occurred with Hugging Face: {type(e).__name__} - {e}"

In [None]:
# --- Main Application Logic (User Interaction) ---
if __name__ == "__main__":
    print(f"Welcome to the Hugging Face Text Completion App using {HF_MODEL_ID} via {HF_PROVIDER}!")
    print("Type 'quit' or 'exit' to end the session.")
    print("Type '!settings' to change AI model parameters (temperature, max_tokens, top_p).")
    print("\nNOTE: The model ID is fixed to 'MiniMaxAI/MiniMax-M1-80k' for this setup.")

    # Initialize current parameters (model is fixed for this setup)
    current_temperature = 0.7
    current_max_tokens = 150
    current_top_p = 1.0

    def get_user_settings():
        """Function to prompt user for and validate AI parameters."""
        global current_temperature, current_max_tokens, current_top_p

        print("\n--- AI Model Settings (MiniMaxAI) ---")
        print(f"Current Temperature: {current_temperature}")
        print(f"Current Max Tokens: {current_max_tokens}")
        print(f"Current Top P: {current_top_p}")
        print("Enter new values or press Enter to keep current value.")

        # Temperature
        while True:
            temp_input = input(f"Enter Temperature (0.0 to 1.0, current: {current_temperature}): ").strip()
            if not temp_input: break
            try:
                new_temp = float(temp_input)
                if 0.0 <= new_temp <= 1.0: current_temperature = new_temp; break
                else: print("Invalid range. Temperature must be between 0.0 and 1.0.")
            except ValueError: print("Invalid input. Please enter a number.")

        # Max Tokens
        while True:
            max_tokens_input = input(f"Enter Max Tokens (integer > 0, current: {current_max_tokens}): ").strip()
            if not max_tokens_input: break
            try:
                new_max_tokens = int(max_tokens_input)
                if new_max_tokens > 0: current_max_tokens = new_max_tokens; break
                else: print("Invalid range. Max Tokens must be a positive integer.")
            except ValueError: print("Invalid input. Please enter an integer.")

        # Top P
        while True:
            top_p_input = input(f"Enter Top P (0.0 to 1.0, current: {current_top_p}): ").strip()
            if not top_p_input: break
            try:
                new_top_p = float(top_p_input)
                if 0.0 <= new_top_p <= 1.0: current_top_p = new_top_p; break
                else: print("Invalid range. Top P must be between 0.0 and 1.0.")
            except ValueError: print("Invalid input. Please enter a number.")

        print("\nSettings updated!")
        print(f"New Temperature: {current_temperature}")
        print(f"New Max Tokens: {current_max_tokens}")
        print(f"New Top P: {current_top_p}")
        print("------------------------")

    # Get initial settings from user
    get_user_settings()

    while True:
        user_prompt = input("\nEnter your prompt (or type '!settings' to change parameters): ")

        if user_prompt.lower() in ['quit', 'exit']:
            print("Exiting application. Goodbye!")
            break
        elif user_prompt.lower() == '!settings':
            get_user_settings()
            continue
        elif not user_prompt.strip():
            print("Input cannot be empty. Please try again.")
            continue

        print("Generating response...")
        ai_response = get_deepseek_completion(
            user_prompt,
            temperature=current_temperature,
            max_tokens=current_max_tokens,
            top_p=current_top_p
        )

        if ai_response and not ai_response.startswith("An error occurred"):
            print("\nAI Response:")
            print(ai_response)
        else:
            print("Failed to get a valid response from the AI.")
            print(f"Error details: {ai_response}")


Welcome to the Hugging Face Text Completion App using deepseek-ai/DeepSeek-R1-0528 via together!
Type 'quit' or 'exit' to end the session.
Type '!settings' to change AI model parameters (temperature, max_tokens, top_p).

NOTE: The model ID is fixed to 'MiniMaxAI/MiniMax-M1-80k' for this setup.

--- AI Model Settings (MiniMaxAI) ---
Current Temperature: 0.7
Current Max Tokens: 150
Current Top P: 1.0
Enter new values or press Enter to keep current value.
Enter Temperature (0.0 to 1.0, current: 0.7): 
Enter Max Tokens (integer > 0, current: 150): 
Enter Top P (0.0 to 1.0, current: 1.0): 

Settings updated!
New Temperature: 0.7
New Max Tokens: 150
New Top P: 1.0
------------------------

Enter your prompt (or type '!settings' to change parameters): Explain recursion like I’m five.
Generating response...

AI Response:
<think>
We are going to explain recursion to a five-year-old. We need to use a very simple analogy that a child can relate to.

Idea: Use a story or a task that repeats itself