In [None]:
#pip install ollama nltk rouge-score

In [None]:
from ollama import Client
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from rouge_score import rouge_scorer

# Initialize Ollama client
ollama = Client(host='http://localhost:11434')

In [None]:

# Example task — summarize log errors (realistic for devs/engineers)
context = """
2025-10-15 12:02:31 ERROR ConnectionTimeout: Database connection failed after 30s.
2025-10-15 12:02:32 INFO Retrying connection...
2025-10-15 12:02:35 ERROR AuthenticationFailed: Invalid DB credentials.
2025-10-15 12:02:40 INFO Shutting down pipeline gracefully.
"""

reference_summary = "Database connection failed due to timeout and authentication issues."

# Helper: Generate response
def ask_model(prompt):
    res = ollama.chat(model='mistral', messages=[{'role': 'user', 'content': prompt}])
    return res['message']['content'].strip()

In [None]:
# -------------------- Prompt Variants ----------------------

#Zero-shot
zero_shot_prompt = f"Summarize the following server log in one sentence:\n{context}"
zero_shot_output = ask_model(zero_shot_prompt)

In [None]:
zero_shot_output

In [None]:
#One-shot
one_shot_prompt = f"""
Example:
Log: "2025-10-14 08:01:10 ERROR APIError: Token expired."
Summary: API request failed due to expired token.

Now summarize the following:
{context}
"""
one_shot_output = ask_model(one_shot_prompt)

In [None]:
one_shot_output

In [None]:
#Few-shot
few_shot_prompt = f"""
Examples:
Log: "2025-10-14 08:01:10 ERROR APIError: Token expired."
Summary: API request failed due to expired token.
---
Log: "2025-10-12 22:10:05 ERROR DiskFull: Cannot write to /tmp."
Summary: Disk was full preventing file writes.
---
Now summarize the following:
{context}
"""
few_shot_output = ask_model(few_shot_prompt)

In [None]:
few_shot_output

In [None]:
# Chain-of-Thought (CoT)
cot_prompt = f"""
Let's reason step by step.
1. Identify key errors and their causes.
2. Summarize them concisely.

Logs:
{context}
"""
cot_output = ask_model(cot_prompt)

In [None]:
print(cot_output)

In [None]:
#Self-consistency: multiple reasoning samples averaged
import random

sc_outputs = []
for i in range(3):  # 3 reasoning paths
    sc_prompt = cot_prompt + f"\nReasoning attempt {i+1}:"
    sc_outputs.append(ask_model(sc_prompt))

# choose most frequent / best overlap summary (simple heuristic)
from collections import Counter
final_sc_output = Counter(sc_outputs).most_common(1)[0][0]

In [None]:
print(final_sc_output)

In [None]:
print(sc_outputs)

In [None]:
sc_outputs

In [None]:
from nltk.translate.bleu_score import corpus_bleu, SmoothingFunction
from rouge_score import rouge_scorer

def evaluate(prediction, reference):
    # Tokenize
    reference_tokens = [reference.split()]
    prediction_tokens = prediction.split()
    
    # BLEU with smoother to prevent 0s
    try:
        bleu = corpus_bleu([reference_tokens], [prediction_tokens],
                           smoothing_function=SmoothingFunction().method1)
    except TypeError:
        bleu = 0.0  # fallback if NLTK bug appears

    # ROUGE
    rouge = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)
    rouge_scores = rouge.score(reference, prediction)
    rouge1 = rouge_scores['rouge1'].fmeasure
    rougeL = rouge_scores['rougeL'].fmeasure

    return bleu, rouge1, rougeL

In [None]:
# -------------------- Evaluate ------------------------------
print("=== Evaluation Results ===\n")
for label, output in [
    ("Zero-shot", zero_shot_output),
    ("One-shot", one_shot_output),
    ("Few-shot", few_shot_output),
    ("Chain-of-Thought", cot_output),
    ("Self-Consistency", final_sc_output)
]:
    bleu, rouge1, rougeL = evaluate(output, reference_summary)
    print(f"{label}:\nOutput: {output}\nBLEU={bleu:.3f}, ROUGE-1={rouge1:.3f}, ROUGE-L={rougeL:.3f}\n")

In [None]:
reference_comment = "Function to connect to database and return connection object."

prompt = """
# Write a Python function to connect to a database.
# Generate a one-line docstring describing its purpose clearly.

def connect_db():
    pass
"""

pred = ask_model(prompt)
bleu, rouge1, rougeL = evaluate(pred, reference_comment)
print("Model output:", pred)
print(f"BLEU={bleu:.3f}, ROUGE-1={rouge1:.3f}")

In [None]:
reference_sentence = "ETL job completed successfully for customer data pipeline."

prompt = """
Rephrase this ETL status message in the same wording style:
"ETL job completed successfully for customer data pipeline."
"""

pred = ask_model(prompt)
bleu, rouge1, rougeL = evaluate(pred, reference_sentence)
print(pred)
print(f"BLEU={bleu:.3f}, ROUGE-1={rouge1:.3f}")

In [None]:
#pip install langchain langchain-community chromadb sentence-transformers streamlit

In [None]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer("all-MiniLM-L6-v2")
print("Model loaded OK")

In [None]:
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import TextLoader

loader = TextLoader("reports/fin_report.txt")
docs = loader.load()

embedding = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

# In-memory mode – no persistence directory
vectordb = Chroma.from_documents(docs, embedding, persist_directory=None)
print("In-memory vector store created successfully!")

In [None]:
# rag_prepare.py
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import TextLoader

# Load
loader = TextLoader("reports/fin_report.txt")
docs = loader.load()
'''
# Embed & store
embedding = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
vectordb = Chroma.from_documents(docs, embedding, persist_directory="db")
vectordb.persist()

print("Vector store ready")'''

from langchain_community.vectorstores import FAISS

embedding = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
vectordb = FAISS.from_documents(docs, embedding)
print("FAISS index created")

In [None]:
from langchain_community.vectorstores import FAISS

embedding = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
vectordb = FAISS.from_documents(docs, embedding)
print("FAISS index created")

In [None]:
vectordb.save_local("faiss_index")
print("FAISS vector store created and saved to ./faiss_index")

In [None]:
# rag_query.py
from langchain.chains import RetrievalQA
from langchain_community.llms import Ollama
from langchain_community.vectorstores import Chroma

llm = Ollama(model="mistral")
#vectordb = Chroma(persist_directory="db")
vectordb = FAISS.load_local("faiss_index", embedding, allow_dangerous_deserialization=True)
retriever = vectordb.as_retriever(search_kwargs={"k": 3})

qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True
)

query = "Summarize the risk section of the report"
result = qa(query)
print(result["result"])

In [None]:
# Ask a question
query = "Summarize the major risk factors mentioned in the report."
result = qa(query)

print("Query:", query)
print("Answer:\n", result["result"])

In [None]:
# ========================================
# Step 3 – Interactive Streamlit App
# ========================================
with open("rag_faiss_app.py", "w", encoding="utf-8") as f:
    f.write("""
import streamlit as st
from langchain_community.llms import Ollama
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain.chains import RetrievalQA

st.title("Financial Report Q&A (Ollama + FAISS + LangChain)")
query = st.text_input("Ask a question about the financial report:")

if st.button("Get Answer") and query:
    with st.spinner("Analyzing..."):
        embedding = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
        vectordb = FAISS.load_local("faiss_index", embedding, allow_dangerous_deserialization=True)
        retriever = vectordb.as_retriever(search_kwargs={"k": 3})
        llm = Ollama(model="mistral")
        qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)
        answer = qa.run(query)
        st.write("### Answer:")
        st.write(answer)
""")
