<a href="https://colab.research.google.com/github/KolipakaRamesh/AIML_Practice_Excercises/blob/main/MCP_R%26D.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [15]:
# Cell 1: Install required packages
!pip install -q transformers langchain accelerate

In [30]:
from transformers import pipeline, GPT2Tokenizer
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.memory import ConversationBufferMemory

# ✅ Subclass ConversationBufferMemory using a private _max_turns field
class LimitedConversationMemory(ConversationBufferMemory):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self._max_turns = 3  # Limit to last 3 Human+AI exchanges

    @property
    def buffer(self):
        lines = super().buffer.strip().split('\n')
        limited_lines = lines[-self._max_turns * 2:]  # Each turn = Human + AI
        return "\n".join(limited_lines)


In [None]:
# Load GPT-2 model and tokenizer
model_name = "gpt2"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)

hf_pipe = pipeline(
    "text-generation",
    model=model_name,
    tokenizer=tokenizer,
    max_new_tokens=100,
    temperature=0.7,
    top_p=0.9,
    do_sample=True,
)

llm = HuggingFacePipeline(pipeline=hf_pipe)

# Prompt template for chat-style interaction
prompt = PromptTemplate.from_template(
    """This is a conversation between a curious human and a helpful AI assistant.

{history}
Human: {input}
AI:"""
)

# ✅ Use our custom limited memory class
memory = LimitedConversationMemory(return_messages=False)

# Create LLMChain with prompt and memory
chain = LLMChain(llm=llm, prompt=prompt, memory=memory, verbose=True)


In [None]:
def run_mcp_chat():
    print("🔁 MCP R&D Chat (GPT-2). Type 'exit' or 'quit' to stop.\n")
    while True:
        user_input = input("You: ")
        if user_input.lower() in {"exit", "quit"}:
            print("👋 Exiting chat.")
            break

        # Run chain and get response
        response = chain.run(user_input)

        # Show prompt token count for debug (optional)
        full_prompt = prompt.format(input=user_input, history=memory.buffer)
        token_count = len(tokenizer.encode(full_prompt))
        print(f"\n[Prompt Token Count]: {token_count}")

        print(f"AI: {response.strip()}\n")
        print("-" * 40)

# Start the chat
run_mcp_chat()
