In [None]:
import time

# Rate Limit Handling
rate_limit_time = 1  # Example rate limit time in seconds
last_api_call_time = 0

def handle_rate_limit():
    global last_api_call_time
    current_time = time.time()
    time_since_last_call = current_time - last_api_call_time
    if time_since_last_call < rate_limit_time:
        time_to_wait = rate_limit_time - time_since_last_call
        print(f"Rate limit enforced. Waiting for {time_to_wait:.2f} seconds.")
        time.sleep(time_to_wait)
    last_api_call_time = current_time

# Token Limit Handling
max_tokens = 1000 # Example maximum token limit
tokens_used = 0

def truncate_to_token_limit(text, max_tokens):
    tokens = text.split()
    if len(tokens) > max_tokens:
        truncated_tokens = tokens[:max_tokens]
        truncated_text = " ".join(truncated_tokens)
        return truncated_text
    return text

# Conversation History
conversation_history = []

def add_to_conversation_history(text):
    current_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
    conversation_history.append(f"[{current_time}] {text}")

def update_tokens_used(tokens):
    global tokens_used
    tokens_used += tokens

def format_prompt():
    conversation = "\n".join(conversation_history)
    truncated_conversation = truncate_to_token_limit(conversation, max_tokens - tokens_used)
    return f"{truncated_conversation}\n{input_text}"


# Custom LLM-like Response Generator
def generate_custom_response(prompt):
    responses = {
        "Hello": "Hello! How can I assist you?",
        "How are you?": "I'm just a computer program, but I'm here to help!",
        "Tell me a joke": "Sure, here's one: Why don't scientists trust atoms? Because they make up everything!",
        "What's the weather today?": "I'm not connected to the internet, so I can't provide real-time weather information.",
        "Goodbye": "Goodbye! Have a great day!"
    }
    return responses.get(prompt, "I'm not sure how to respond to that.")

# Function to call LLM and handle errors
def call_llm(prompt):
    try:
        handle_rate_limit()  # Handle rate limit
        llm_response = generate_custom_response(prompt)
        tokens_required = len(llm_response.split())
        if tokens_used + tokens_required > max_tokens:
            raise TokenLimitError("Token limit exceeded.")
        update_tokens_used(tokens_required)  # Update tokens used based on LLM response
        return llm_response
    except RateLimitError as e:
        print("Rate limit error. Please wait a moment and try again.")
    except TokenLimitError as e:
        print("Token limit error. Please try again with a shorter input.")
    except Exception as e:
        print("An error occurred. Please try again later.")
    return "I'm not sure how to respond to that."

# Simulating user inputs
user_inputs = [
    "Hello",
    "How are you?",
    "Tell me a joke",
    "What's the weather today?",
    "Goodbye"
]

# Main loop for simulating conversation
for input_text in user_inputs:
    prompt = format_prompt()
    llm_response = call_llm(input_text)
    add_to_conversation_history(f"User Input: {input_text}")
    add_to_conversation_history(f"LLM Response: {llm_response}")  # Add LLM response to history

    # Print conversation history and remaining tokens
    print("\n".join(conversation_history))
    print("Remaining Tokens:", max_tokens - tokens_used)
    print("=" * 30)
