In [1]:
!pip install langchain langchain-community langchain-core transformers torch accelerate


Collecting langchain-community
  Downloading langchain_community-0.4.1-py3-none-any.whl.metadata (3.0 kB)
INFO: pip is looking at multiple versions of langchain-community to determine which version is compatible with other requirements. This could take a while.
  Downloading langchain_community-0.4-py3-none-any.whl.metadata (3.0 kB)
  Downloading langchain_community-0.3.31-py3-none-any.whl.metadata (3.0 kB)
Collecting requests<3,>=2 (from langchain)
  Downloading requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting dataclasses-json<0.7.0,>=0.6.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7.0,>=0.6.7->langchain-community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7.0,>=0.6.7->langchain-community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting 

In [4]:
from transformers import pipeline
from langchain_community.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain_core.runnables import RunnableSequence

# 🧠 Load a smart small model (offline after first download)
generator = pipeline(
    "text-generation",
    model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    max_new_tokens=300,
    temperature=0.3,
    device_map="auto"
)

# Wrap it for LangChain
llm = HuggingFacePipeline(pipeline=generator)

# Prompt template for open Q&A
prompt = PromptTemplate(
    input_variables=["question"],
    template="You are an intelligent assistant. Answer the following question clearly and accurately.\n\nQuestion: {question}\n\nAnswer:"
)

# Combine into a Runnable chain
chain = prompt | llm

# 🔁 Ask your question here
question = input("💬 Ask me anything: ")

response = chain.invoke({"question": question})
print("\n🤖 Answer:\n", response)


config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

Device set to use cpu


💬 Ask me anything: what is machine learning

🤖 Answer:
 You are an intelligent assistant. Answer the following question clearly and accurately.

Question: what is machine learning

Answer: Machine learning is a field of computer science that involves the use of algorithms and statistical methods to learn from data without human intervention. It is used in various fields such as healthcare, finance, and marketing to make predictions, recommendations, and optimize processes.


In [5]:
!pip install streamlit transformers accelerate langchain langchain-community langchain-core


Collecting streamlit
  Downloading streamlit-1.51.0-py3-none-any.whl.metadata (9.5 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.51.0-py3-none-any.whl (10.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.2/10.2 MB[0m [31m17.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m15.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pydeck, streamlit
Successfully installed pydeck-0.9.1 streamlit-1.51.0


In [6]:
%%writefile app.py
import streamlit as st
from transformers import pipeline
from langchain_community.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain_core.runnables import RunnableSequence

st.set_page_config(page_title="Offline AI Assistant", page_icon="🤖")

st.title("🤖 Offline AI Assistant (TinyLlama)")
st.write("Ask me anything — works without API keys!")

@st.cache_resource
def load_model():
    generator = pipeline(
        "text-generation",
        model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
        max_new_tokens=300,
        temperature=0.3,
        device_map="auto"
    )
    llm = HuggingFacePipeline(pipeline=generator)
    prompt = PromptTemplate(
        input_variables=["question"],
        template="You are a helpful assistant. Answer the following question clearly and accurately.\n\nQuestion: {question}\n\nAnswer:"
    )
    return prompt | llm

chain = load_model()

user_question = st.text_input("Enter your question:")
if st.button("Get Answer"):
    if user_question.strip() == "":
        st.warning("Please enter a question.")
    else:
        with st.spinner("Thinking... 🤔"):
            response = chain.invoke({"question": user_question})
        st.success("✅ Answer:")
        st.write(response)


Writing app.py


In [7]:
!pip install streamlit pyngrok transformers accelerate langchain langchain-community langchain-core


Collecting pyngrok
  Downloading pyngrok-7.4.1-py3-none-any.whl.metadata (8.1 kB)
Downloading pyngrok-7.4.1-py3-none-any.whl (25 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.4.1


In [10]:
from pyngrok import ngrok
import threading, time, os

# Start Streamlit on a background thread
def run_streamlit():
    os.system("streamlit run app.py --server.port 8501")

threading.Thread(target=run_streamlit).start()
time.sleep(5)

# Create public link
public_url = ngrok.connect(8501)
print("🌐 Open your app here:", public_url)


🌐 Open your app here: NgrokTunnel: "https://isanomalous-earleen-unpalatally.ngrok-free.dev" -> "http://localhost:8501"


In [9]:
from pyngrok import ngrok
ngrok.set_auth_token("35E9T7A6tsRgKvZiyrezowZIJ1b_YeHB4kjmdTYjmeB49yuk")


In [11]:
public_url = ngrok.connect(8501)
print("🌐 Open your app here:", public_url)


🌐 Open your app here: NgrokTunnel: "https://isanomalous-earleen-unpalatally.ngrok-free.dev" -> "http://localhost:8501"


In [12]:
!streamlit run app.py --server.port 8501 --server.headless true



Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
2025-11-09 05:02:18.978 Port 8501 is already in use
