# **Steps to execute:**
1. Don't run the code for extracting man pages, extracting chunks and embeddings and storing in chromadb.
2. The chromadb folder sent must be saved in drive/MyDrive.
3. The code can be run continuously from the rag framework part till the agentic part incorporating text input is finished. The test cases are mentioned in the output of the agentic workflow tested earlier.
4. For testing out speech input execution, in your /content folder, an empty audio file named user_audio.wav must be created before executing.
5. Based on what agent action(main, fix, autocomplete, etc.) you want to perform, the cell recording speech and converting to text must be modified accordingly.
6. Fine tuning of gpt2 and evaluation of fine tuned model using bleu, rouge, bert score can be run subsequently.

In [1]:
'''running this code is not needed as it is for extracting linux man pages which will later be stored in the chromadb'''


import requests
from bs4 import BeautifulSoup

BASE_URL = "https://man7.org/linux/man-pages/"

def get_commands_from_section(section):
    """Extracts command names from a specific man section"""
    section_url = f"{BASE_URL}{section}/"

    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
    }

    response = requests.get(section_url, headers=headers)
    if response.status_code != 200:
        print(f"Failed to fetch section {section}. Status Code: {response.status_code}")
        return []

    soup = BeautifulSoup(response.text, "html.parser")
    commands = []

    for link in soup.find_all("a", href=True):
        if link["href"].endswith(".html"):  # Valid command pages
            command_name = link.text.strip()
            command_url = BASE_URL + section + "/" + link["href"]
            commands.append((command_name, command_url))

    return commands

def get_all_linux_commands():
    """Extracts commands from all major sections"""
    sections = ["man1", "man2", "man3", "man4", "man5", "man6", "man7", "man8"]  # Common sections
    all_commands = []

    for section in sections:
        print(f"Fetching commands from {section}...")
        commands = get_commands_from_section(section)
        all_commands.extend(commands)

    return all_commands

# Run the function
linux_commands = get_all_linux_commands()

# Print results
print(f"\nTotal commands extracted: {len(linux_commands)}")
for cmd, url in linux_commands[:10]:  # Show first 10
    print(f"{cmd} -> {url}")

Fetching commands from man1...
Fetching commands from man2...
Fetching commands from man3...
Fetching commands from man4...
Fetching commands from man5...
Fetching commands from man6...
Fetching commands from man7...
Fetching commands from man8...

Total commands extracted: 29527
AS.1.html -> https://man7.org/linux/man-pages/man1/AS.1.html
CLEAR.1.html -> https://man7.org/linux/man-pages/man1/CLEAR.1.html
Firecfg.1.html -> https://man7.org/linux/man-pages/man1/Firecfg.1.html
Firejail.1.html -> https://man7.org/linux/man-pages/man1/Firejail.1.html
Firemon.1.html -> https://man7.org/linux/man-pages/man1/Firemon.1.html
LICENSE.LGPL-2.1.html -> https://man7.org/linux/man-pages/man1/LICENSE.LGPL-2.1.html
LICENSE.LGPL-2.1@@pcp.html -> https://man7.org/linux/man-pages/man1/LICENSE.LGPL-2.1@@pcp.html
LICENSE.LGPL2.1.html -> https://man7.org/linux/man-pages/man1/LICENSE.LGPL2.1.html
LICENSE.LGPL2.1@@numactl.html -> https://man7.org/linux/man-pages/man1/LICENSE.LGPL2.1@@numactl.html
PCPCompat.1.

In [2]:
!pip install flagEmbedding chromadb nltk tiktoken #running this code for all the libraries to be downloaded


Collecting flagEmbedding
  Downloading FlagEmbedding-1.3.4.tar.gz (163 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/163.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m163.8/163.8 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting chromadb
  Downloading chromadb-1.0.6-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.9 kB)
Collecting tiktoken
  Downloading tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Collecting datasets>=2.19.0 (from flagEmbedding)
  Downloading datasets-3.5.0-py3-none-any.whl.metadata (19 kB)
Collecting ir-datasets (from flagEmbedding)
  Downloading ir_datasets-0.5.10-py3-none-any.whl.metadata (12 kB)
Collecting build>=1.0.3 (from chromadb)
  Downloading build-1.2.2.post1-py3-none-any.whl.metadata (6.5 kB)
Collecting chroma-hnswlib==0.7.6 (from chromadb)
 

In [3]:
'''running this code is not necessary as the model used here (bge-m3) is used for generating embeddings for the man pages'''

from FlagEmbedding import BGEM3FlagModel

# Load the BGE-M3 embedding model (FP16 for performance boost)
embedding_model = BGEM3FlagModel("BAAI/bge-m3", use_fp16=True)

def generate_embedding(text):
    """
    Generates an embedding for the given text chunk.

    Args:
        text (str): The text chunk to embed.

    Returns:
        list: List of embedding values.
    """
    # Ensure input is a list (required by encode method)
    if isinstance(text, str):
        text = [text]

    # Generate embeddings
    embeddings = embedding_model.encode(text, max_length=8192)  # Ensures large text compatibility

    # Extract dense embeddings correctly
    return embeddings["dense_vecs"][0]  # Returns first embedding as a list


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/444 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/964 [00:00<?, ?B/s]

Fetching 30 files:   0%|          | 0/30 [00:00<?, ?it/s]

.gitattributes:   0%|          | 0.00/1.63k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/191 [00:00<?, ?B/s]

colbert_linear.pt:   0%|          | 0.00/2.10M [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/123 [00:00<?, ?B/s]

.DS_Store:   0%|          | 0.00/6.15k [00:00<?, ?B/s]

README.md:   0%|          | 0.00/15.8k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/687 [00:00<?, ?B/s]

bm25.jpg:   0%|          | 0.00/132k [00:00<?, ?B/s]

long.jpg:   0%|          | 0.00/485k [00:00<?, ?B/s]

miracl.jpg:   0%|          | 0.00/576k [00:00<?, ?B/s]

mkqa.jpg:   0%|          | 0.00/608k [00:00<?, ?B/s]

others.webp:   0%|          | 0.00/21.0k [00:00<?, ?B/s]

nqa.jpg:   0%|          | 0.00/158k [00:00<?, ?B/s]

long.jpg:   0%|          | 0.00/127k [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

Constant_7_attr__value:   0%|          | 0.00/65.6k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/698 [00:00<?, ?B/s]

model.onnx:   0%|          | 0.00/725k [00:00<?, ?B/s]

model.onnx_data:   0%|          | 0.00/2.27G [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.17k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/54.0 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/2.27G [00:00<?, ?B/s]

sparse_linear.pt:   0%|          | 0.00/3.52k [00:00<?, ?B/s]

In [5]:
!pip install -U langchain_google_genai #running code for downloading necessary libraries

Collecting langchain_google_genai
  Downloading langchain_google_genai-2.1.3-py3-none-any.whl.metadata (4.7 kB)
Collecting filetype<2.0.0,>=1.2.0 (from langchain_google_genai)
  Downloading filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)
Collecting google-ai-generativelanguage<0.7.0,>=0.6.16 (from langchain_google_genai)
  Downloading google_ai_generativelanguage-0.6.17-py3-none-any.whl.metadata (9.8 kB)
Downloading langchain_google_genai-2.1.3-py3-none-any.whl (43 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.7/43.7 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading filetype-1.2.0-py2.py3-none-any.whl (19 kB)
Downloading google_ai_generativelanguage-0.6.17-py3-none-any.whl (1.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m28.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: filetype, google-ai-generativelanguage, langchain_google_genai
  Attempting uninstall: google-ai-generativelangu

In [None]:
''' this code is using the bge-m3 embedding model to extract linux commands from man pages manually, splitting the texts into chunks, embedding them and then storing in chroma db.
We've already extracted 2 man pages and couldn't extract more due to resource and time constraints and hence will send over the chromadb file to be used directly as a vector database in the rag and agentic workflow'''

import requests
from bs4 import BeautifulSoup
import chromadb
import shutil
from FlagEmbedding import BGEM3FlagModel  # Import embedding model

BASE_URL = "https://man7.org/linux/man-pages/"

from google.colab import drive
drive.mount('/content/drive', force_remount=True)

CHROMA_DB_PATH = "/content/drive/MyDrive/chroma_db"  # Original database path
LOCAL_DB_PATH = "/content/chroma_db"  # Temporary writable copy

# Copy database to a writable location
shutil.copytree(CHROMA_DB_PATH, LOCAL_DB_PATH, dirs_exist_ok=True)

chroma_client = chromadb.PersistentClient(path=LOCAL_DB_PATH)
collection = chroma_client.get_or_create_collection(name="linux_manuals")

# Load Embedding Model
embedding_model = BGEM3FlagModel("BAAI/bge-m3", use_fp16=True)

def get_commands_with_manuals():
    """Extracts Linux command names and retrieves their manual texts from all 8 man pages."""
    sections = [f"man{i}" for i in range(1, 3)]  # ['man1', 'man2']
    linux_manuals = {}

    for section in sections:
        print(f" Fetching commands from {section}...")
        commands = get_commands_from_section(section)

        for command_name, command_url in commands:
            manual_text = fetch_manual_text(command_url)
            if manual_text:  # Only store if text is extracted
                linux_manuals[command_name] = manual_text

        print(f" {section} done")

    return linux_manuals

def fetch_manual_text(command_url):
    """Fetches and extracts text from a Linux man page."""
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
    }

    try:
        response = requests.get(command_url, headers=headers, timeout=10)
        response.raise_for_status()
    except requests.RequestException:
        return None  # Fail silently

    soup = BeautifulSoup(response.text, "html.parser")
    manual_text = soup.get_text(separator=" ", strip=True)  # Extract readable text
    return manual_text if manual_text.strip() else None

def chunk_text(text, chunk_size=512):
    """Splits text into smaller chunks for embedding."""
    words = text.split()
    return [" ".join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]

def generate_embedding(text):
    """Generates an embedding for the given text chunk."""
    embedding_output = embedding_model.encode(text)
    if isinstance(embedding_output, dict) and "dense_vecs" in embedding_output:
        return embedding_output["dense_vecs"].tolist()  # Convert NumPy array to list
    raise ValueError("Unexpected embedding output format from BGEM3FlagModel.")

def store_in_chromadb(command, chunks):
    """Embeds and stores command manual chunks in ChromaDB."""
    for i, chunk in enumerate(chunks):
        embedding = generate_embedding(chunk)  # Convert chunk into vector
        collection.add(
            documents=[chunk],
            embeddings=[embedding],  # Store embedding
            metadatas=[{"command": command}],
            ids=[f"{command}_{i}"]
        )
    print(f" Stored {len(chunks)} chunks for '{command}' in ChromaDB.")

# Extract and store manuals
linux_manuals = get_commands_with_manuals()

for command, manual_text in linux_manuals.items():
    chunks = chunk_text(manual_text)  # Step 1: Chunking
    store_in_chromadb(command, chunks)  # Step 2 & 3: Embedding + Storing

# Copy the updated database back to Google Drive
shutil.copytree(LOCAL_DB_PATH, CHROMA_DB_PATH, dirs_exist_ok=True)

print(" Linux manuals stored successfully in ChromaDB with embeddings!")'''


In [6]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [7]:
import os
import chromadb

chromadb_path = "/content/drive/MyDrive/chroma_db"
files = os.listdir(chromadb_path)

print("Files in /content/drive/MyDrive/chroma_db:")
for file in files:
    print(file)


Files in /content/drive/MyDrive/chroma_db:
439d5ac7-663d-4d19-8309-d7bd2d3bc91a
c61a04e3-c5d1-4d6d-aca5-7e88408f7366
chroma.sqlite3


In [8]:
import chromadb

CHROMA_DB_PATH = "/content/drive/MyDrive/chroma_db"
chroma_client = chromadb.PersistentClient(path=CHROMA_DB_PATH)

collections = chroma_client.list_collections()
print("Available collections:", [col for col in collections])  # Directly print collection names





Available collections: [Collection(name=linux_manuals)]


In [9]:
collection = chroma_client.get_collection(name="linux_manuals")

# Get a sample of stored documents
documents = collection.peek()  # Retrieves a few stored items
print("Sample documents:", documents)


Sample documents: {'ids': ['AS.1.html_0', 'AS.1.html_1', 'AS.1.html_2', 'AS.1.html_3', 'AS.1.html_4', 'AS.1.html_5', 'AS.1.html_6', 'AS.1.html_7', 'AS.1.html_8', 'AS.1.html_9'], 'embeddings': array([[-0.03994751,  0.01423645, -0.05480957, ..., -0.01207733,
         0.00915527, -0.01890564],
       [-0.04208374,  0.00125599, -0.01429749, ...,  0.00124454,
         0.01154327,  0.00323296],
       [-0.06121826,  0.00611115, -0.02102661, ...,  0.0112915 ,
        -0.00519943, -0.00682831],
       ...,
       [-0.00044489,  0.01210022, -0.03610229, ...,  0.02043152,
        -0.00046515,  0.08892822],
       [ 0.00215721, -0.00169468, -0.03030396, ...,  0.03634644,
        -0.00894928,  0.05047607],
       [-0.01406097, -0.02500916, -0.02325439, ..., -0.01789856,


In [12]:
'''this code is only a rag framework that extracts relevant top 3 linux commands from the chromadb database based on the user query'''

import chromadb

def search_command(query, top_k=3):
    """Searches for the most relevant Linux manual sections based on the query."""

    # Initialize ChromaDB client
    chroma_client = chromadb.PersistentClient(path="/content/drive/MyDrive/chroma_db")
    collection = chroma_client.get_or_create_collection(name="linux_manuals")

    # Generate embedding for the user query
    query_embedding = generate_embedding(query)  # Use the same embedding function

    # Perform similarity search in ChromaDB
    results = collection.query(
        query_embeddings=[query_embedding],
        n_results=top_k  # Retrieve top-k most relevant results
    )

    # Check if results exist
    if not results["documents"]:
        return " No relevant manual found."

    # Format and return results
    response = " **Relevant Linux Commands:**\n\n"
    for i, doc in enumerate(results["documents"][0]):
        command = results["metadatas"][0][i]["command"]
        response += f"🔹 **{command}**\n{doc[:500]}...\n\n"  # Display first 500 chars

    return response

# Example usage
user_query = "How do I list files in Linux?"
print(search_command(user_query))



📖 **Relevant Linux Commands:**

🔹 **git-ls-files.1@@git.html**
git-ls-files(1) - Linux manual page man7.org > Linux > man-pages Linux/UNIX system programming training git-ls-files(1) — Linux manual page NAME | SYNOPSIS | DESCRIPTION | OPTIONS | OUTPUT | FIELD NAMES | EXCLUDE PATTERNS | SEE ALSO | GIT | COLOPHON GIT-LS-FILES (1) Git Manual GIT-LS-FILES (1) NAME top git-ls-files - Show information about files in the index and the working tree SYNOPSIS top git ls-files [-z] [-t] [-v] [-f] [-c|--cached] [-d|--deleted] [-o|--others] [-i|--ignored] [-s|--stage] [...

🔹 **git-ls-files.1.html**
git-ls-files(1) - Linux manual page man7.org > Linux > man-pages Linux/UNIX system programming training git-ls-files(1) — Linux manual page NAME | SYNOPSIS | DESCRIPTION | OPTIONS | OUTPUT | FIELD NAMES | EXCLUDE PATTERNS | SEE ALSO | GIT | COLOPHON GIT-LS-FILES (1) Git Manual GIT-LS-FILES (1) NAME top git-ls-files - Show information about files in the index and the working tree SYNOPSIS top git ls-file

In [13]:

query_text = "What does the 'as' command do in Linux?"
query_embedding = embedding_model.encode(query_text)["dense_vecs"].tolist()

results = collection.query(
    query_embeddings=[query_embedding],
    n_results=5  # Adjust as needed
)

print("Search results:", results)




In [14]:
# Check document count
doc_count = collection.count()
print(f"📂 Total documents in collection: {doc_count}")

# Fetch a few stored documents
sample_docs = collection.get(limit=3)
print("📄 Sample Documents:", sample_docs)


📂 Total documents in collection: 20091


In [22]:
!pip install -U langchain langchain-community google-generativeai langchain_google_genai #relevant libraries downloaded


INFO: pip is looking at multiple versions of langchain-google-genai to determine which version is compatible with other requirements. This could take a while.
Collecting langchain_google_genai
  Using cached langchain_google_genai-2.1.3-py3-none-any.whl.metadata (4.7 kB)
  Downloading langchain_google_genai-2.1.2-py3-none-any.whl.metadata (4.7 kB)
  Downloading langchain_google_genai-2.1.1-py3-none-any.whl.metadata (4.7 kB)
  Downloading langchain_google_genai-2.1.0-py3-none-any.whl.metadata (3.6 kB)
INFO: pip is still looking at multiple versions of langchain-google-genai to determine which version is compatible with other requirements. This could take a while.
  Downloading langchain_google_genai-2.0.11-py3-none-any.whl.metadata (3.6 kB)
  Downloading langchain_google_genai-2.0.10-py3-none-any.whl.metadata (3.6 kB)
Downloading langchain_google_genai-2.0.10-py3-none-any.whl (41 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.0/42.0 kB[0m [31m2.5 MB/s[0m eta [3

In [19]:
!chmod -R 777 /content/drive/MyDrive/chroma_db



In [20]:
from langchain_community.llms import HuggingFaceHub
from langchain_community.embeddings import HuggingFaceEmbeddings
embedding_model = HuggingFaceEmbeddings(model_name="BAAI/bge-m3")


  embedding_model = HuggingFaceEmbeddings(model_name="BAAI/bge-m3")


model.safetensors:   0%|          | 0.00/2.27G [00:00<?, ?B/s]

In [21]:
import google.generativeai as genai

genai.configure(api_key="AIzaSyBD5dECyML_lefRHqLf6BPmV3h7AtA2YkQ")


In [23]:
"""final code for the agentic workflow to be run"""

import os
from langchain.agents import initialize_agent, Tool
from langchain.agents.agent_types import AgentType
from langchain.memory import ConversationBufferMemory
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.vectorstores import Chroma
from langchain_core.tools import tool
from langchain_google_genai import ChatGoogleGenerativeAI
from termcolor import colored
from rich import print
import subprocess

os.environ["GOOGLE_API_KEY"] = "AIzaSyBD5dECyML_lefRHqLf6BPmV3h7AtA2YkQ"

# Gemini LLM
llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    temperature=0,
    google_api_key=os.environ["GOOGLE_API_KEY"]
)

#  Vector DB + Retriever
embedding = HuggingFaceBgeEmbeddings(model_name="BAAI/bge-m3", model_kwargs={"device": "cuda"})
db = Chroma(persist_directory="linux_manuals_chroma", embedding_function=embedding)
retriever = db.as_retriever()

#  Tool 1: Search Linux Manual (RAG-based)
@tool
def search_linux_manual(query: str) -> str:
    """Search Linux manual and provide a helpful answer using retrieved documents."""
    docs = retriever.get_relevant_documents(query)
    context = "\n\n".join(doc.page_content for doc in docs[:5])
    prompt = f"""You are a thoughtful Linux assistant. Let’s think step-by-step.

1. Understand the user's query.
2. Search relevant manual entries.
3. Analyze and summarize key concepts.
4. Provide a well-reasoned and helpful answer.

User Query: {query}

Context:
{context}

Answer:"""
    return llm.invoke(prompt).content

# Tool 2: Fix Bash Commands
@tool
def fix_bash_command(command: str) -> str:
    """Fix, correct, or improve a given Linux bash command."""
    prompt = f"""You're a bash expert. Let’s think step-by-step.

1. Identify issues with the command.
2. Suggest a corrected version.
3. Explain why the fix works.

Command: {command}
"""
    return llm.invoke(prompt).content

# Tool 3: ASCII Diagram Generator
@tool
def generate_ascii_diagram(topic: str) -> str:
    """Generate an ASCII-style explanation or diagram from a Linux concept."""
    prompt = f"""You're a Linux tutor. Let’s think step-by-step.

1. Identify what this Linux concept is.
2. Break it down visually or structurally.
3. Generate a simple ASCII explanation or diagram.

Topic: {topic}
"""
    return llm.invoke(prompt).content

# Tool 4: Autocomplete Command
@tool
def autocomplete_command(fragment: str) -> str:
    """Suggest common completions or full commands based on the input."""
    docs = retriever.get_relevant_documents(fragment)
    examples = "\n".join(doc.page_content for doc in docs[:3])
    prompt = f"""You are a Linux command completion assistant. Let’s think step-by-step.

1. Interpret what the user is trying to do.
2. Use common command patterns.
3. Suggest a complete and valid command.

User typed: `{fragment}`

Reference Commands:
{examples}

Your Suggestions:"""
    return llm.invoke(prompt).content

#  Tool 5: Disk Analyzer
@tool(description="Analyzes disk space and returns a summary of disk usage.")
def analyze_disk_space_func() -> str:
    try:
        output = subprocess.check_output(["df", "-h"], text=True)
        return (
            "Let’s think step-by-step:\n"
            "1. We’ll use the `df -h` command to view disk space.\n"
            "2. It shows mounted partitions and their usage.\n"
            "3. Here's the result:\n\n" + output
        )
    except subprocess.CalledProcessError as e:
        return f"❌ Failed to analyze disk space: {str(e)}"

analyze_disk_space = Tool(
    name="analyze_disk_space",
    func=analyze_disk_space_func,
    description="Analyzes disk space and returns a summary of disk usage"
)

# Memory
memory = ConversationBufferMemory(memory_key="chat_history")

# Agents
main_agent = initialize_agent(
    tools=[search_linux_manual],
    llm=llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True,
    memory=memory
)

fix_agent = initialize_agent(
    tools=[fix_bash_command],
    llm=llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True
)

autocomplete_agent = initialize_agent(
    tools=[autocomplete_command],
    llm=llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True
)

disk_agent = initialize_agent(
    tools=[analyze_disk_space],
    llm=llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True,
    memory=memory
)

ascii_agent = initialize_agent(
    tools=[generate_ascii_diagram],
    llm=llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True
)

# Agent Router with Chain-of-Thought Prompts
def run_assistant(user_query: str, agent_type: str = "main"):
    print(colored(f"🧠 Query: {user_query}", "cyan"))
    cot_prompt = f"Let’s think step-by-step about this task:\n{user_query}"

    if agent_type == "fix":
        return fix_agent.run(cot_prompt)
    elif agent_type == "autocomplete":
        return autocomplete_agent.run(cot_prompt)
    elif agent_type == "disk":
        return disk_agent.run("I want to understand the disk usage step-by-step.")
    elif agent_type == "ascii":
        return ascii_agent.run(cot_prompt)
    else:
        return main_agent.run(cot_prompt)

# CLI Loop
if __name__ == "__main__":
    print("🔧 Linux Assistant is ready. Type 'quit' or 'exit' to end.")

    while True:
        mode = input("Choose agent [main / fix / autocomplete / disk / ascii]: ").lower()

        if mode in ["quit", "exit"]:
            print("👋 Goodbye!")
            break

        if mode not in ["main", "fix", "autocomplete", "disk", "ascii"]:
            print(colored("Invalid agent selection. Please choose from [main, fix, autocomplete, disk, ascii].", "red"))
            continue

        if mode == "disk":
            response = run_assistant("Check disk usage", agent_type=mode)
            print("\n📊 Disk Info:\n", response)
            continue

        query = input("\nEnter your query: ")

        if query.lower() in ["quit", "exit"]:
            print("👋 Goodbye!")
            break

        response = run_assistant(query, mode)
        print(colored(f"🔧 Response:\n{response}", "green"))


  embedding = HuggingFaceBgeEmbeddings(model_name="BAAI/bge-m3", model_kwargs={"device": "cuda"})
  db = Chroma(persist_directory="linux_manuals_chroma", embedding_function=embedding)
  memory = ConversationBufferMemory(memory_key="chat_history")
  main_agent = initialize_agent(


Choose agent [main / fix / autocomplete / disk / ascii]: main

Enter your query: change permissions of directory shreya


  return main_agent.run(cot_prompt)




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to find the Linux command to change directory permissions and how to use it with the directory name "shreya".  I'll use the Linux manual to find the relevant command and its options.

Action: search_linux_manual
Action Input: "chmod directory permissions"[0m

  docs = retriever.get_relevant_documents(query)



Observation: [36;1m[1;3mThe user's query "chmod directory permissions" seeks understanding of how to change permissions on directories using the `chmod` command in Linux.  Let's break down the process step-by-step.

**1. Understanding the User's Query:**

The user wants to learn how to modify the read, write, and execute permissions for a directory.  They likely need to control which users and groups can access, modify, and list the contents of that directory.

**2. Searching Relevant Manual Entries:**

We'll consult the `chmod` man page (`man chmod`).  Key concepts from the manual page include:

* **Permissions:**  Permissions are represented by three sets of three characters, one each for the owner, group, and others. Each character represents read (r), write (w), and execute (x) permissions.  A hyphen (-) indicates the permission is denied.  For example, `rwxr-xr-x` means the owner has read, write, and execute permissions; the group has read and execute; and others have read and 

Choose agent [main / fix / autocomplete / disk / ascii]: fix

Enter your query: git commit file




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: The command `git commit file` is incomplete.  It needs a message.  I should use `fix_bash_command` to improve it.
Action: fix_bash_command
Action Input: `git commit file`[0m
Observation: [36;1m[1;3mThe command `git commit file` has several potential issues:

1. **Ambiguity:**  `file` is not a specific file. Git needs a path to a file or a list of files to stage before committing.  Simply typing `file` is ambiguous and will likely result in an error.

2. **Missing staging:**  Git uses a staging area.  Files need to be added to the staging area before they can be committed.  This command attempts to commit without staging.

3. **Missing commit message:** A commit message is mandatory.  It describes the changes made in the commit.


**Corrected Version:**

There are several ways to correct this, depending on the desired outcome:

**A. Committing a single file with a message:**

```bash
git add file.txt  # Replace fil

Choose agent [main / fix / autocomplete / disk / ascii]: disk




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to analyze the disk space to understand the disk usage step-by-step.  I'll use the `analyze_disk_space` tool.
Action: analyze_disk_space
Action Input: '/'  (Assuming I want to analyze the root directory)[0m
Observation: [36;1m[1;3mLet’s think step-by-step:
1. We’ll use the `df -h` command to view disk space.
2. It shows mounted partitions and their usage.
3. Here's the result:

Filesystem      Size  Used Avail Use% Mounted on
overlay         113G   49G   64G  44% /
tmpfs            64M     0   64M   0% /dev
shm             5.7G  4.0K  5.7G   1% /dev/shm
/dev/root       2.0G  1.2G  775M  61% /usr/sbin/docker-init
tmpfs           6.4G  4.3M  6.4G   1% /var/colab
/dev/sda1        92G   69G   24G  75% /kaggle/input
tmpfs           6.4G     0  6.4G   0% /proc/acpi
tmpfs           6.4G     0  6.4G   0% /proc/scsi
tmpfs           6.4G     0  6.4G   0% /sys/firmware
drive           113G   53G   61G  47% /content/dr

Choose agent [main / fix / autocomplete / disk / ascii]: autocomplete

Enter your query: grep




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to understand what the user wants to do with the `grep` command.  The command itself is incomplete. I should use autocomplete to get suggestions.

Action: autocomplete_command
Action Input: grep[0m
Observation: [36;1m[1;3m1. **Interpreting User Intent:** The user has typed `grep`, indicating they likely want to search for a pattern in a file or files.  However, without further input, the command is incomplete.

2. **Common Command Patterns:**  `grep` commands typically follow the pattern: `grep [options] "pattern" [files]`

3. **Suggestions:**  Since we don't know the user's intended pattern or files, I'll offer a few suggestions covering common use cases:

* **Searching for a string in the current directory:** `grep "pattern" *`  (This searches for "pattern" in all files in the current directory. Replace "pattern" with the actual string.)

* **Searching for a string in a specific file:** `grep "pattern" my

Choose agent [main / fix / autocomplete / disk / ascii]: ascii

Enter your query: search for all .csv files in current directory named drive, sort by size and list top 3




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to find a way to express this task using Linux commands.  The task involves finding files, sorting them, and selecting the top three.  An ASCII diagram might help visualize the process, but it's not strictly necessary to answer the question.  I'll focus on constructing the command line solution.

Action: None needed at this stage.

Thought: The core commands will be `find`, `sort`, and `head`.  `find` will locate the files, `sort` will order them by size, and `head` will select the top three.  I need to specify the sorting criteria and file type.

Action: None needed at this stage.

Thought: The command will look something like this: `find . -name "drive*.csv" -printf "%s %p\n" | sort -n | head -n 3`.  Let's break it down: `find . -name "drive*.csv"` finds all files starting with "drive" and ending with ".csv" in the current directory. `-printf "%s %p\n"` prints the file size and path. `sort -n` sorts numerica

Choose agent [main / fix / autocomplete / disk / ascii]: exit


In [24]:
'''the above code was for text, the following codes are for audio input processing'''
#downloading important libraries
!pip install -q openai-whisper
!pip install -q pydub
!sudo apt-get install -y ffmpeg


[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/800.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m798.7/800.5 kB[0m [31m23.8 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m800.5/800.5 kB[0m [31m16.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Building wheel for openai-whisper (pyproject.toml) ... [?25l[?25hdone
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 34 not upgraded.


In [27]:
'''it is required to create an initial empty user_audio.wav file in the content/ folder before running the below code'''
'''the audio recorded here is only for 8 seconds which can be increased according to convenience'''

from IPython.display import display, Javascript
from google.colab import output
import base64
import time

# JavaScript to record audio from mic for 4 seconds
RECORD = """
const sleep = time => new Promise(resolve => setTimeout(resolve, time))
const b2text = blob => new Promise(resolve => {
  const reader = new FileReader();
  reader.onloadend = () => resolve(reader.result.split(',')[1]);
  reader.readAsDataURL(blob);
});

var record = async () => {
  const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
  const mediaRecorder = new MediaRecorder(stream);
  const audioChunks = [];

  mediaRecorder.ondataavailable = event => {
    if (event.data.size > 0) audioChunks.push(event.data);
  };

  mediaRecorder.start();
  await sleep(8000);  // Record for 8 seconds, can be increased according to convenience
  mediaRecorder.stop();

  await new Promise(resolve => mediaRecorder.onstop = resolve);
  const audioBlob = new Blob(audioChunks);
  const base64Audio = await b2text(audioBlob);
  return base64Audio;
};

record().then(audio => {
  google.colab.kernel.invokeFunction('notebook.save_audio', [audio], {});
});
"""

# Callback to save the audio in user_audio.wav
def save_audio(audio_base64):
    audio_bytes = base64.b64decode(audio_base64)
    with open("user_audio.wav", "wb") as f:

        f.write(audio_bytes)
    print("🎙️ Audio saved as 'user_audio.wav'")

# Register the callback
output.register_callback("notebook.save_audio", save_audio)


# Display recorder
display(Javascript(RECORD))

# Wait for audio to be recorded and saved
time.sleep(15)


<IPython.core.display.Javascript object>

In [28]:
import whisper

# Load whisper model
model = whisper.load_model("base")

# Transcribe the saved audio
result = model.transcribe("user_audio.wav")
query = result["text"]

print("🗣️ You said:", query) #converts speech to text


In [29]:

# Assuming you already defined run_assistant() and tools as in your original code , now we can run our initial workflow with speech input given in above code cells
response = run_assistant(query, agent_type="main")
print("🤖 Assistant Response:\n", response)




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to find a Linux command to rename a file.  The `mv` command is commonly used for this.
Action: search_linux_manual
Action Input: "mv command"[0m
Observation: [36;1m[1;3mThe `mv` command in Linux is used to rename files or directories, and to move files or directories from one location to another.  Let's break down its functionality step-by-step:

**1. Understanding the User's Query:** The user wants to understand the `mv` command, its usage, and its capabilities.

**2. Searching Relevant Manual Entries:**  We would consult the `man mv` page.  Key information gleaned from the manual page includes:

* **Basic Syntax:** `mv [OPTION]... SOURCE... DESTINATION`
* **Renaming:**  If `DESTINATION` is a file, `SOURCE` is renamed to `DESTINATION`.
* **Moving:** If `DESTINATION` is a directory, `SOURCE` is moved into `DESTINATION`.
* **Multiple Sources:** Multiple `SOURCE` files can be moved to a single `DESTINATION` d

In [None]:
#other inputs can be used based on the type of agents you want to use
# Assuming you already defined run_assistant() and tools as in your original code
response = run_assistant(query, agent_type="fix")
print("🤖 Assistant Response:\n", response)




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to create a bash command that lists files.  A simple `ls` command will work, but I can improve it for clarity and robustness.

Action: fix_bash_command
Action Input: `ls`[0m
Observation: [36;1m[1;3m1. **Issues with the command `ls`:**

The command `ls` itself isn't inherently flawed, but it lacks robustness and clarity for many use cases.  The primary issues are:

* **Lack of error handling:** If `ls` encounters a directory it cannot access (due to permissions, etc.), it will print an error message to standard error, potentially obscuring other output.
* **Ambiguous output:** The output format is simple, but can be difficult to parse programmatically.  It doesn't handle filenames with spaces or special characters consistently across different systems.
* **No control over output:**  The output goes to standard output, which might not be suitable for all scenarios (e.g., piping to another command that expects

In [None]:
#other inputs can be used based on the type of agents you want to use. for eg , to use autocomplete agent , speech input can be given something as : git
# Assuming you already defined run_assistant() and tools as in your original code
response = run_assistant(query, agent_type="autocomplete")
print("🤖 Assistant Response:\n", response)




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to use the autocomplete_command function to get suggestions for commands related to listing files.  I'll try a few fragments to see what I get.

Action: autocomplete_command
Action Input: ls[0m
Observation: [36;1m[1;3m1. **Interpreting User Intent:** The user is trying to list the contents of the current directory.

2. **Common Command Patterns:** The `ls` command is already a complete command.  However, it can be enhanced with options for better output.

3. **Suggested Commands:**

* **`ls -l`**: This will list the files in long listing format, showing details like permissions, owner, size, and modification time.  This is a very common enhancement.
* **`ls -lh`**: This combines the long listing format (`-l`) with human-readable sizes (`-h`), making it easier to understand file sizes (e.g., 1K, 234M, 2G).
* **`ls -al`**: This adds the hidden files (starting with '.') to the long listing.  Useful for seeing 

In [None]:
#voice input given here can be disk analysis , for different voice inputs the above code cell recording audio and transcribing it should be run
# Assuming you already defined run_assistant() and tools as in your original code
response = run_assistant(query, agent_type="disk")
print("🤖 Assistant Response:\n", response)


In [1]:
!pip install peft==0.3.0 transformers==4.28.1 accelerate bitsandbytes datasets trl langchain_google_genai langchain_community langchain google-generativeai termcolor rich

Collecting peft==0.3.0
  Downloading peft-0.3.0-py3-none-any.whl.metadata (21 kB)
Collecting transformers==4.28.1
  Downloading transformers-4.28.1-py3-none-any.whl.metadata (109 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m110.0/110.0 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
Collecting bitsandbytes
  Downloading bitsandbytes-0.45.5-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting datasets
  Downloading datasets-3.5.0-py3-none-any.whl.metadata (19 kB)
Collecting trl
  Downloading trl-0.16.1-py3-none-any.whl.metadata (12 kB)
Collecting langchain_google_genai
  Downloading langchain_google_genai-2.1.3-py3-none-any.whl.metadata (4.7 kB)
Collecting langchain_community
  Downloading langchain_community-0.3.22-py3-none-any.whl.metadata (2.4 kB)
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers==4.28.1)
  Downloading tokenizers-0.13.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Collecting dill<0.3.9,>=0.3

In [2]:
!pip install -U transformers # update the transformers library
!pip install -U peft # update the peft library

Collecting transformers
  Downloading transformers-4.51.3-py3-none-any.whl.metadata (38 kB)
Collecting tokenizers<0.22,>=0.21 (from transformers)
  Downloading tokenizers-0.21.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.8 kB)
Downloading transformers-4.51.3-py3-none-any.whl (10.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.4/10.4 MB[0m [31m79.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading tokenizers-0.21.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.0/3.0 MB[0m [31m83.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tokenizers, transformers
  Attempting uninstall: tokenizers
    Found existing installation: tokenizers 0.13.3
    Uninstalling tokenizers-0.13.3:
      Successfully uninstalled tokenizers-0.13.3
  Attempting uninstall: transformers
    Found existing installation: transformers 4.28.1
    Uninstalling transforme

In [4]:

'''since geminiai cant be publicly finetuned, we decided to use gpt2 and finetune it as part of comparative study'''
!pip install transformers peft chromadb

import os
import torch
import chromadb
from transformers import GPT2LMHeadModel, GPT2Tokenizer, Trainer, TrainingArguments, DataCollatorForLanguageModeling
from peft import get_peft_model, LoraConfig
from torch.utils.data import Dataset

# === Connect to ChromaDB and Load Manpages ===
CHROMA_PATH = "/content/drive/MyDrive/chroma_db"
chroma_client = chromadb.PersistentClient(path=CHROMA_PATH)
collection = chroma_client.get_collection(name="linux_manuals")

documents = collection.get()
texts = documents["documents"][:500]
commands = [meta["command"] for meta in documents["metadatas"][:500]]


# === Prepare Paired Training Data ===
train_pairs = [{"input": cmd, "output": text} for cmd, text in zip(commands, texts)]

# === Load Tokenizer & Model ===
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token  # Fix for padding
model = GPT2LMHeadModel.from_pretrained("gpt2")

# === Apply LoRA ===
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)

# === Custom Dataset Class ===
class ManpageDataset(Dataset):
    def __init__(self, data, tokenizer, max_length=512):
        self.examples = []
        for pair in data:
            text = f"{pair['input']}\n{pair['output']}"
            tokens = tokenizer(text, truncation=True, padding="max_length", max_length=max_length, return_tensors="pt")
            tokens["labels"] = tokens["input_ids"].clone()
            self.examples.append(tokens)

    def __len__(self):
        return len(self.examples)

    def __getitem__(self, idx):
        return {k: v.squeeze(0) for k, v in self.examples[idx].items()}

# === Prepare Dataset ===
train_dataset = ManpageDataset(train_pairs, tokenizer)

# === Define Training Arguments ===
training_args = TrainingArguments(
    output_dir="/content/drive/MyDrive/fine_tuned_linux_model",
    overwrite_output_dir=True,
    per_device_train_batch_size=4,
    num_train_epochs=3,
    logging_steps=10,
    save_steps=500,
    save_total_limit=2,
    #evaluation_strategy="no",
    fp16=True  # Optional: enable for faster training with supported GPU
)

# === Trainer Setup ===
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)
)

# === Train the Model ===
trainer.train()

# === Save Fine-tuned Model ===
model.save_pretrained("/content/drive/MyDrive/fine_tuned_linux_model")
tokenizer.save_pretrained("/content/drive/MyDrive/fine_tuned_linux_model")




No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mpes1202201316[0m ([33mpes1202201316-pes-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.


Step,Training Loss
10,3.1711
20,3.2219
30,3.4087
40,3.3751
50,3.2382
60,3.2881
70,3.2348
80,3.1361
90,3.2752
100,3.2414


('/content/drive/MyDrive/fine_tuned_linux_model/tokenizer_config.json',
 '/content/drive/MyDrive/fine_tuned_linux_model/special_tokens_map.json',
 '/content/drive/MyDrive/fine_tuned_linux_model/vocab.json',
 '/content/drive/MyDrive/fine_tuned_linux_model/merges.txt',
 '/content/drive/MyDrive/fine_tuned_linux_model/added_tokens.json')

In [5]:
!pip install --upgrade --force-reinstall torchvision

Collecting torchvision
  Downloading torchvision-0.22.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (6.1 kB)
Collecting numpy (from torchvision)
  Downloading numpy-2.2.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.0/62.0 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torch==2.7.0 (from torchvision)
  Downloading torch-2.7.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (29 kB)
Collecting pillow!=8.3.*,>=5.3.0 (from torchvision)
  Downloading pillow-11.2.1-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (8.9 kB)
Collecting filelock (from torch==2.7.0->torchvision)
  Downloading filelock-3.18.0-py3-none-any.whl.metadata (2.9 kB)
Collecting typing-extensions>=4.10.0 (from torch==2.7.0->torchvision)
  Downloading typing_extensions-4.13.2-py3-none-any.whl.metadata (3.0 kB)
Collecting sympy>=1.13.3 (from torch==2.7.0->torchvision)
  Downloading sympy-1.13.3-py3-none-any.whl.

In [8]:
#evaluation of the fine tuned model done
import json

# Create the evaluation data in JSON format
evaluation_data = [
    {
        "prompt": "What does the `ls` command do?",
        "expected_response": "The `ls` command lists directory contents."
    },
    {
        "prompt": "Fix the command: cd..",
        "expected_response": "The correct command is: cd .."
    },
    {
        "prompt": "Generate an ASCII diagram of a Linux file structure.",
        "expected_response": "/ (root)\n├── bin\n├── etc\n├── home\n│   └── user\n└── var"
    }
]

# Save the data to a JSON file
with open("/content/evaluation_data.json", "w") as f:
    json.dump(evaluation_data, f, indent=4)

print("Evaluation data saved to 'evaluation_data.json'")


In [9]:
pip install transformers datasets evaluate torch


Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting fsspec<=2024.12.0,>=2023.1.0 (from fsspec[http]<=2024.12.0,>=2023.1.0->datasets)
  Using cached fsspec-2024.12.0-py3-none-any.whl.metadata (11 kB)
Downloading evaluate-0.4.3-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25hUsing cached fsspec-2024.12.0-py3-none-any.whl (183 kB)
Installing collected packages: fsspec, evaluate
  Attempting uninstall: fsspec
    Found existing installation: fsspec 2025.3.2
    Uninstalling fsspec-2025.3.2:
      Successfully uninstalled fsspec-2025.3.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
fastai 2.7.19 requires torch<2.7,>=1.10, but you have torch 2.7.0 which is incompatible.
gcsfs 2025.3.2 requires fsspec==2025.3.2, but yo

In [10]:
!pip install rouge_score
!pip install bert_score


Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge_score
  Building wheel for rouge_score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24987 sha256=0deddd861a8a4efe716db80c4348990414ab777a7712736235de4195c4407841
  Stored in directory: /root/.cache/pip/wheels/1e/19/43/8a442dc83660ca25e163e1bd1f89919284ab0d0c1475475148
Successfully built rouge_score
Installing collected packages: rouge_score
Successfully installed rouge_score-0.1.2
Collecting bert_score
  Downloading bert_score-0.3.13-py3-none-any.whl.metadata (15 kB)
Downloading bert_score-0.3.13-py3-none-any.whl (61 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bert_score
Successfully installed bert_score-0.3.13


In [11]:
#finetuning evaluation using bleu, rouge, bert score
import json
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
import evaluate

# === Load Evaluation Dataset ===
with open("evaluation_data.json", "r") as f:
    eval_data = json.load(f)

# === Evaluation Functions ===
def get_responses(pipeline_fn, eval_data):
    preds, refs = [], []
    for item in eval_data:
        prompt = item["prompt"]
        expected = item["expected_response"]

        # Use the pipeline to generate responses
        output = pipeline_fn(prompt)[0]["generated_text"]
        preds.append(output)
        refs.append(expected)
    return preds, refs

# === Load Fine-Tuned Model ===
FT_MODEL_PATH = "/content/drive/MyDrive/fine_tuned_linux_model"  # Update with your fine-tuned model path

tokenizer = AutoTokenizer.from_pretrained(FT_MODEL_PATH, local_files_only=True)

# Fine-tuned pipeline (use AutoModelForCausalLM for fine-tuned GPT-style models)
ft_model = AutoModelForCausalLM.from_pretrained(FT_MODEL_PATH, device_map="auto", torch_dtype="auto", local_files_only=True)
ft_pipe = pipeline("text-generation", model=ft_model, tokenizer=tokenizer, max_new_tokens=256)

# === Generate Fine-Tuned Model Outputs ===
print("🔍 Generating fine-tuned model outputs...")
ft_preds, ft_refs = get_responses(ft_pipe, eval_data)

# === Evaluate Fine-Tuned Model ===
bleu = evaluate.load("bleu")
rouge = evaluate.load("rouge")
bertscore = evaluate.load("bertscore")

# Evaluate BLEU score for the fine-tuned model
print("\n📊 BLEU Score")
print("Fine-Tuned:", bleu.compute(predictions=ft_preds, references=ft_refs))

# Evaluate ROUGE score for the fine-tuned model
print("\n📊 ROUGE Score")
print("Fine-Tuned:", rouge.compute(predictions=ft_preds, references=ft_refs))

# Evaluate BERTScore for the fine-tuned model
print("\n📊 BERTScore")
print("Fine-Tuned:", bertscore.compute(predictions=ft_preds, references=ft_refs, lang="en"))


Device set to use cpu


Downloading builder script:   0%|          | 0.00/5.94k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/1.55k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/3.34k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/7.95k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
