In [None]:
# Setup Environment
#!pip install openai transformers streamlit langchain faiss-cpu pypdf sentence-transformers

## Part 1: OpenAI & HuggingFace

In [None]:
# OpenAI
"""
import openai
openai.api_key = "sk-xxxx"
response = openai.ChatCompletion.create(
  model="gpt-3.5-turbo",
  messages=[{"role": "user", "content": "What is LangChain?"}]
)
print(response['choices'][0]['message']['content'])
"""

In [None]:
# HuggingFace Free Model
from transformers import pipeline
hf_pipe = pipeline("text-generation", model="gpt2")
print(hf_pipe("What is a language model?", max_length=50)[0]['generated_text'])

In [None]:
#!pip install transformers torch

In [None]:
from transformers import pipeline

# Load Phi-3 model
llm = pipeline("text-generation", model="microsoft/Phi-3-mini-4k-instruct", device=-1)  # CPU
prompt = "What is KYC in banking?"
response = llm(prompt, max_length=100, num_return_sequences=1)[0]["generated_text"]
print("Response:", response)

## Part 2: Build PDF Q&A System (using LangChain)

In [None]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import HuggingFacePipeline

loader = PyPDFLoader("E://l&w/5_Day_AI_GenAI_Course_Outline.pdf")
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
texts = text_splitter.split_documents(documents)

embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
db = FAISS.from_documents(texts, embeddings)

In [None]:
# Sample Query
query = "What is the main idea in the document?"
docs = db.similarity_search(query)
for doc in docs:
    print(doc.page_content)

## Part 3: Streamlit Interface (PDF Chatbot)
Run via `streamlit run app.py`. This is a basic interface demo.

In [None]:
"""
import streamlit as st
st.title("PDF Chatbot")
user_input = st.text_input("Ask a question from PDF")
if user_input:
    results = db.similarity_search(user_input, k=2)
    for r in results:
        st.write(r.page_content)
"""

## Part 4: LangChain Core Concepts

In [None]:
from transformers import pipeline
from langchain.prompts import PromptTemplate
from langchain.llms import HuggingFacePipeline

# Hugging Face pipeline
pipe = pipeline("text-generation", model="gpt2")
llm = HuggingFacePipeline(pipeline=pipe)

# Prompt
prompt = PromptTemplate.from_template("Tell me a joke about {topic}")

# LCEL style chain (prompt | llm)
chain = prompt | llm
result = chain.invoke({"topic": "AI"})

print(result)

## Part 5: Free Local Models (Mistral via Ollama)
Install and run Ollama (https://ollama.com), then run `ollama run mistral`.

In [None]:
from langchain_community.llms import Ollama
llm = Ollama(model="mistral")
print(llm.invoke("Explain vector databases in 30 words"))

In [None]:
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are an expert AI assistant"),
    ("user", "{question}")
])
chain = prompt | llm
print(chain.invoke({"question": "How do you build a chatbot using LLMs?"}))

## Part 6: Retrieval-Augmented Generation (RAG)

In [None]:
from langchain.chains import RetrievalQA
retriever = db.as_retriever()
rag_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
print(rag_chain.run("Summarize the PDF"))

## Part 7: Tools & Function Calling in LangChain

In [None]:
from langchain.agents import initialize_agent, Tool

def dummy_tool(input):
    return f"Log analysis completed on: {input}"

tools = [Tool(name="LogAnalyzer", func=dummy_tool, description="Analyzes game logs")]
agent = initialize_agent(tools, llm, agent="zero-shot-react-description", verbose=True)
print(agent.run("Run LogAnalyzer on today's gaming data"))

## Part 8: Mini Project – Game Betting Log Analysis
Simulate some JSON logs and run questions over it.

In [None]:
logs = [
  {"player": "Alice", "bet": 100, "outcome": "win"},
  {"player": "Bob", "bet": 150, "outcome": "loss"}
]
# Save or embed logs to vector store, then query it using same RAG setup.

In [20]:
#pip install sentence-transformers scikit-learn

In [None]:
from sentence_transformers import SentenceTransformer, util
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Sample documents
documents = [
    "Machine learning helps in predicting data patterns.",
    "NLP enables computers to understand and generate human language.",
    "Cooking is both an art and a science.",
    "Transformers have revolutionized natural language processing.",
    "Natural language processing allows machines to comprehend speech and text."
]

# Your query
query = "How do machines understand human language?"

# --- SEMANTIC SEARCH ---
model = SentenceTransformer('all-MiniLM-L6-v2')
doc_embeddings = model.encode(documents, convert_to_tensor=True)
query_embedding = model.encode(query, convert_to_tensor=True)

semantic_scores = util.cos_sim(query_embedding, doc_embeddings)[0].tolist()
semantic_results = sorted(zip(documents, semantic_scores), key=lambda x: x[1], reverse=True)

print("🔍 Semantic Search Results:")
for doc, score in semantic_results:
    print(f"{score:.4f} - {doc}")

# --- LEXICAL SEARCH (TF-IDF) ---
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(documents + [query])
lexical_scores = cosine_similarity(tfidf_matrix[-1], tfidf_matrix[:-1])[0]
lexical_results = sorted(zip(documents, lexical_scores), key=lambda x: x[1], reverse=True)

print("\n📝 Lexical Search Results:")
for doc, score in lexical_results:
    print(f"{score:.4f} - {doc}")
