In [None]:
pip install faiss-cpu langchain_community huggingface_hub transformers sentence_transformers



In [None]:
import sqlite3
import csv
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
import os
import numpy as np

In [None]:
def load_data_from_sqlite(db_path, query):
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    cursor.execute(query)
    rows = cursor.fetchall()
    conn.close()
    # Convert each row into a Document object
    documents = [Document(page_content=row[2]+" " +row[3]+" "+row[4]+" "+row[5]) for row in rows]
    return documents

def load_data_from_csv(csv_path):
    documents = []
    with open(csv_path, mode='r', encoding='utf-8', errors='replace') as file:
        reader = csv.reader(file)
        #header = next(reader)  # Skip the header row if the CSV has one

        for row in reader:
            # Skip rows containing null bytes (b'\x00')
            if any('\x00' in cell for cell in row):
                continue
            page_content = row[0] + " " + row[1] + " " + row[2] + " " + row[3]
            documents.append(Document(page_content=page_content))
    return documents


In [None]:
def load_combined_data(db_path, query, csv_path):
    # Load data from SQLite
    sqlite_documents = load_data_from_sqlite(db_path, query)

    # Load data from CSV
    csv_documents = load_data_from_csv(csv_path)

    # Combine both lists of documents
    combined_documents = sqlite_documents + csv_documents

    return combined_documents


In [None]:
# Define the database path and query
db_path = "/content/courses.sqlite"
query = "SELECT * FROM merged_courses"
csv_path = "/content/taads_data_filtered.csv"
documents = load_combined_data(db_path, query, csv_path)

# Print or process the combined documents
for doc in documents:
    print(doc.page_content)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Bitcoin and Cryptocurrency Technologies University of Colorado System mobile ip  data transmission  internet protocol suite  routing  internet  multicast  dynamic host configuration protocol  internet protocol  tcp congestion control  Computer Networking computer-science computer-security-and-networks In this course, we give an in-depth study of the TCP/IP protocols. We examine the details of how IP enables communications across a collection of networks. We pay particular attention to the hierarchical structure of IP addresses and explain their role in ensuring scalability of the Internet. The role of address prefixes and the uses of masks are explained in details. We review in details about TCP three-way handshake, flow control, and congestion control. Furthermore, we provide an introduction to some Advanced topics, including Multicast, SDN and security
Black-box and White-box Testing DeepLearning.AI natural language  Pa

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=200)
final_documents = text_splitter.split_documents(documents)

# Verify loading and splitting
print(final_documents[0])
print("Total number of document chunks:", len(final_documents))

page_content='Automotive Technology Management Prof Dr Michael Dornieden Identify relevant market framework conditions of carmakers▪ Realize the economic importance of the automotive industry▪ Know automotive key figures in respect to particular countries▪ Categorize the product portfolio of automotive suppliers▪ Examine and understand future trends of automotive markets andresource requirements of carmakers (e.g. electromobility)▪ Critically evaluate strategic alliances between carmakers currently inpractical company use and develop an understanding of how they canbe improved in order to reach the desired organizational goals▪ Know theoretical foundations of product lifecycle management▪ Characterize the six phases of the Generic Product DevelopmentProcess and demonstrate its application to new vehicle projects▪ Solve complex engineering problems in new vehicle projects by usingvariants of the Generic Product Development Process▪ Illustrate the application of the module 

In [None]:

# Initialize embeddings using HuggingFace
huggingface_embeddings = HuggingFaceBgeEmbeddings(
    model_name="BAAI/bge-small-en-v1.5",
    model_kwargs={'device': 'cpu'},
    encode_kwargs={'normalize_embeddings': True}
)

In [None]:
# Example embedding for verification
embedding_example = np.array(huggingface_embeddings.embed_query(final_documents[0].page_content))
print("Embedding vector for the first document chunk:", embedding_example)
print("Shape of the embedding vector:", embedding_example.shape)

# Create FAISS VectorStore for similarity search
vectorstore = FAISS.from_documents(final_documents, huggingface_embeddings)



Embedding vector for the first document chunk: [-1.15054464e-02  7.40906447e-02  3.27483118e-02  2.69325208e-02
  1.56668052e-02  8.30498338e-03  3.04795466e-02  9.25476179e-02
 -8.14083964e-02 -3.81992087e-02  5.89207821e-02 -2.07546912e-02
  5.26157301e-03 -9.79872653e-04  3.52979973e-02  2.60512903e-02
  2.64976528e-02  3.26710311e-03  4.99667190e-02 -5.20252995e-02
 -3.47920810e-03 -5.00219241e-02 -9.06429999e-03 -3.67322415e-02
  3.88614424e-02  9.62221343e-03  1.84531845e-02  4.25992347e-02
 -2.41459999e-02 -1.86382800e-01 -5.42131439e-02  2.78192107e-02
  9.26533993e-03  7.35844150e-02  2.36662049e-02  4.86649647e-02
 -6.78649023e-02 -6.97111432e-03  2.43773144e-02 -4.30075713e-02
 -2.94110912e-04 -5.46175148e-03 -3.52795534e-02 -7.72952214e-02
 -1.65587198e-02 -7.47818779e-03  9.70306806e-03  2.92045972e-03
 -3.71201113e-02 -4.44553569e-02 -7.81841576e-04 -5.36020771e-02
  1.29892752e-02 -4.15704399e-02 -2.72411183e-02  4.80966829e-02
  4.08873148e-02 -1.59821659e-02  5.9119612

In [None]:
# Example Query Using Similarity Search
query = "I want to study mobile systems"
relevant_documents = vectorstore.similarity_search(query)

# Display content of the most relevant document found
print("Most relevant document content:\n", relevant_documents[0].page_content)

# Set up the retriever with similarity search configuration
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})
print("Retriever set up successfully:", retriever)

Most relevant document content:
 Mobile Computing &amp; Application Roy Choudhury, R Same as CS 434. 3 undergraduate hours. 4 graduate hours. Prerequisite: ECE 391, CS 241, CS 341 or ECE 310. Introduction to cross-disciplinary ideas and techniques in mobile computing, with an emphasis on how they can be composed to build systems and applications on smartphones, tablets, and wearable devices. Topics of interest include smartphone sensing, energy efficiency, indoor localization, augmented reality, context-awareness, gesture recognition, and data analytics. Various techniques and methods utilized to combine them into functional systems, propose a new system, define the underlying problems, and solve them end to end. Same as CS 434. 3 undergraduate hours. 4 graduate hours. Prerequisite: ECE 391, CS 241, CS 341 or ECE 310.
Retriever set up successfully: tags=['FAISS', 'HuggingFaceBgeEmbeddings'] vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x7b908b8de850> search_kwarg

In [None]:

# Hugging Face API setup (replace with your Hugging Face API token)
os.environ['HUGGINGFACEHUB_API_TOKEN'] = "API_TOKEN_KEY"

# Initialize Hugging Face model for question-answering
from langchain_community.llms import HuggingFaceHub

hf = HuggingFaceHub(
    repo_id="mistralai/Mistral-7B-Instruct-v0.3",
    model_kwargs={"temperature": 0.8, "max_length": 300}
)

# Execute a query to get a response
response = hf.invoke(query)
print("Response from Hugging Face model:\n", response)

Response from Hugging Face model:
 I want to study mobile systems and their related software and hardware, especially mobile operating systems like Android and iOS, and mobile applications. I am interested in the design, development, optimization, and testing of mobile systems and applications. I am also interested in the security and privacy aspects of mobile systems, such as protecting user data and preventing unauthorized access. I am a self-taught programmer and have experience with various programming languages such as Java, C++, Python, and Swift. I am currently learning Android development and am planning to enroll in a university program to further my studies in this field. I am also interested in research and have started working on a few projects related to mobile systems.


In [None]:
prompt_template="""
Use the following piece of context to answer the question asked.
Please try to provide the answer only based on the context

{context}
Question:{question}

Helpful Answers:
 """

In [None]:
from langchain import PromptTemplate
prompt=PromptTemplate(template=prompt_template,input_variables=["context","question"])

In [None]:
from langchain.chains import RetrievalQA
retrievalQA=RetrievalQA.from_chain_type(
    llm=hf,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt":prompt}
)

In [None]:
query="""tell me about mobile systems"""

In [None]:
# Call the QA chain with our query.
result = retrievalQA.invoke({"query": query})
print(result['result'])

res = result['result']

start_index = res.find("Helpful Answers:")
partial_paragraph = res[start_index:]
print("Partial paragraph:", partial_paragraph)


Use the following piece of context to answer the question asked.
Please try to provide the answer only based on the context

Mobile Systems (Mobile Systeme) Prof. Dr. Michael Cebulla Students learn about substantial concepts and technologies for the development of smart, mobile applications. One focus area consists in the programming with sensor data. Concepts and technologies for the development of advanced mobile applications. Special focus lies on the contextual dependencies of system behavior and the communication between different components. The following topics are examined:- Location-based Services: application of different localization services with different properties, services for the visualization of geographical data, management of geographical data, geofencing, location-based social networking (lbsn)- Communication in mobile applications: bluetooth, NFC, http etc.- Acquisition of environmental data using sensoric interfaces- Activity Recognition- Track & Trace-appl

In [None]:
!pip install flask flask-cors pyngrok




In [None]:
import os

# Create a static directory if it doesn't exist
os.makedirs("static", exist_ok=True)

# Move the logo file to the static directory

os.rename("/content/background.png", "static/background.png")


In [None]:
!ngrok config add-authtoken "NGROK_TOKEN"

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [None]:
# Import necessary libraries
from flask import Flask, request, jsonify, render_template_string, url_for
from pyngrok import ngrok

# Initialize Flask app
app = Flask(__name__)

# Function to perform search query (replace with your actual logic)
def search_query(query):
    try:
        # Example logic (replace retrievalQA with your actual backend object)
        result = retrievalQA.invoke({"query": query})  # Mock result
        if result:
            res = result['result']
            start_index = res.find("Helpful Answers:")
            op = res[start_index:]
            return op
        else:
            return "No results found."
    except Exception as e:
        return f"An error occurred: {e}"

# Flask routes
@app.route('/')
def home():
    # Inline HTML template for simplicity
    html_content = """
    <!DOCTYPE html>
    <html lang="en">
    <head>
        <meta charset="UTF-8">
        <meta name="viewport" content="width=device-width, initial-scale=1.0">
        <title>Course Compass</title>
        <style>
            body {
                font-family: Arial, sans-serif;
                text-align: center;
                margin: 150px;
                padding: 20px;background-image: url('static/background.png');
                background-size: cover;
                background-position: center;
            }

            #search-form {
                margin-top: 20px;
            }
            #search-form input[type="text"] {
                width: 50%;
                padding: 10px;
                font-size: 16px;
            }
            #search-form button {
                padding: 10px 20px;
                font-size: 16px;
            }
            #search-form #voice-btn {
                padding: 10px 20px;
                font-size: 16px;
                cursor: pointer;
                margin-left: 10px;
                background-color: #4CAF50;
                color: white;
                border: none;
                border-radius: 5px;
            }
            #search-form #voice-btn:hover {
                background-color: #45a049;
            }
            #results {
                margin-top: 20px;
                padding: 10px;
                border: 1px solid #ccc;
                background-color: #f9f9f9;
                display: inline-block;
                width: 50%;
            }
        </style>
    </head>
    <body>

        <form id="search-form" onsubmit="performSearch(event)">
            <input type="text" id="query" placeholder="Enter your search term">
            <button type="submit">Search</button>
            <button type="button" id="voice-btn" onclick="startVoiceRecognition()">🎤 Voice</button>
        </form>
        <div id="results">
            <strong>Results:</strong>
            <p id="output"></p>
        </div>
        <script>
            async function performSearch(event) {
                event.preventDefault();
                const query = document.getElementById("query").value;
                const response = await fetch('/search', {
                    method: 'POST',
                    headers: { 'Content-Type': 'application/json' },
                    body: JSON.stringify({ query })
                });
                const data = await response.json();
                const outputElement = document.getElementById("output");
                if (data.result) {
                    outputElement.textContent = data.result;
                } else if (data.error) {
                    outputElement.textContent = data.error;
                }
            }

            function startVoiceRecognition() {
                // Check if browser supports Web Speech API
                if (!('webkitSpeechRecognition' in window)) {
                    alert("Sorry, your browser doesn't support speech recognition.");
                    return;
                }

                // Initialize the SpeechRecognition object
                const recognition = new webkitSpeechRecognition();
                recognition.lang = "en-US";  // Set language to English
                recognition.interimResults = false;  // Do not show interim results
                recognition.maxAlternatives = 1;  // Return the most likely result

                // Start recognition
                recognition.start();

                // Handle recognition events
                recognition.onresult = function(event) {
                    const speechResult = event.results[0][0].transcript;  // Extract the speech-to-text result
                    document.getElementById("query").value = speechResult;  // Fill the query input box
                };

                recognition.onerror = function(event) {
                    console.error("Speech recognition error:", event.error);
                    alert("An error occurred during voice recognition. Please try again.");
                };

                recognition.onend = function() {
                    console.log("Speech recognition ended.");
                };
            }
        </script>
    </body>
    </html>
    """
    return render_template_string(html_content)

@app.route('/search', methods=['POST'])
def search():
    data = request.json
    query = data.get("query", "")
    if query:
        result = search_query(query)
        return jsonify({"result": result})
    else:
        return jsonify({"error": "Query is missing"}), 400

# Start Flask app with ngrok
public_url = ngrok.connect(5000)
print(f"Public URL: {public_url}")
app.run(port=5000)


Public URL: NgrokTunnel: "https://b6c7-34-19-80-120.ngrok-free.app" -> "http://localhost:5000"
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [24/Jan/2025 14:29:24] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [24/Jan/2025 14:29:24] "GET /static/background.png HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [24/Jan/2025 14:29:26] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
INFO:werkzeug:127.0.0.1 - - [24/Jan/2025 14:29:49] "POST /search HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [24/Jan/2025 14:30:38] "POST /search HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [24/Jan/2025 14:31:24] "POST /search HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [24/Jan/2025 14:33:45] "POST /search HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [24/Jan/2025 14:33:48] "POST /search HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [24/Jan/2025 14:36:14] "POST /search HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [24/Jan/2025 14:36:26] "POST /search HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [24/Jan/2025 14:36:51] "POST /search HTTP/1.1" 200 -
INFO:werk