In [1]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

### Loading and Chunking Data

loader = PyPDFDirectoryLoader("./PDF")
documents = loader.load()

documents[-1].metadata['source'][4:-4]

documents[0].metadata['source'][4:-4]

len(documents)

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)

final_document = text_splitter.split_documents(documents)
final_document[0]

len(final_document)

#### Embeddings

In [2]:
from sentence_transformers import SentenceTransformer
from langchain_community.embeddings import HuggingFaceBgeEmbeddings

In [3]:
huggingface_embeddings=HuggingFaceBgeEmbeddings(
    model_name="sentence-transformers/all-MiniLM-l6-v2",
    model_kwargs={'device':'cpu'},
    encode_kwargs={'normalize_embeddings':True}
)

In [49]:
def create_embeddings(text):
    return huggingface_embeddings.embed_query(text)

In [5]:
import numpy as np

print(np.array(create_embeddings(final_document[1].page_content)).shape)

In [6]:
from pymilvus import MilvusClient
import os

In [8]:
milvus_client = MilvusClient(
    uri=os.getenv("MILVUS_ENDPOINT"),
    token=os.getenv("MILVUS_API_KEY")
)

len(final_document)

In [9]:
class initiate_milvus:
    
    def __init__(self,milvus_client,milvus_collection_name):
        self.milvus_client = milvus_client
        self.milvus_collection_name = milvus_collection_name

    def add_data_to_vectordb(self,content):
        for i in range(0,10):
            embedding  = create_embeddings(final_document[i].page_content)
            self.insert_embedding(embedding,final_document[i].page_content)

    def insert_embedding(self,embedding,text):
        row = {
                'vector': embedding,
                'text':text,
        }

        self.milvus_client.insert(self.milvus_collection_name,data=[row])
 

In [10]:
milvus_collection_name = 'NetworkingProjectData'
indexer = initiate_milvus(milvus_client, milvus_collection_name)


#### Data uploaded 
indexer.add_data_to_vectordb(final_document)

In [11]:
class SearchEngine:
    def __init__(self, milvus_client, milvus_collection_name):
        self.milvus_client = milvus_client
        self.milvus_collection_name = milvus_collection_name
  
    def query_milvus(self, embedding):
        result_count = 3
  
        result = self.milvus_client.search(
            collection_name=self.milvus_collection_name,
            data=[embedding],
            limit=result_count,
            output_fields=["text"])
  
        list_of_knowledge_base = list(map(lambda match: match['entity']['text'], result[0]))
  
        return {
            'list_of_knowledge_base': list_of_knowledge_base,
        }


In [12]:
x = SearchEngine(milvus_client,milvus_collection_name)

#### Prompt LLM Training

In [13]:
query = "Do I have an interference issue where my access point is on channel 116 with channel width of 80MHz and channel utilization of 18% and a signal strength of -58.0 dBm and there is a nearby access point on on channel 116 with a channel width of 20MHz and an unknown channel utilization and a signal strength of -63.0 dBm."
query_embed = np.array(create_embeddings(query))
(query_embed).shape

(384,)

In [14]:
Query_Search = x.query_milvus(query_embed)

In [15]:
Query_Search

{'list_of_knowledge_base': ['Review Questions  427\n16. What are some problems that can occur when an access point is transmitting at full power? \n(Choose all that apply.)\nA. Hidden node\nB. Co-channel interference\nC. Mismatched power between the AP and the clients\nD. Intersymbol interference\n17. Why would a WLAN network administrator consider disabling the two lowest rates on an \n802.11b/g access point? (Choose all that apply.)\nA. Medium contention\nB. Adjacent channel interference\nC. Hidden node\nD. Intersymbol interference\nE. All of the above\n18. Which type of interference is caused by destructive multipath?\nA. Intersymbol interference\nB. All-band interference\nC. Narrowband interference\nD. Wideband interference\nE. Physical interference\n19. In a multiple-channel architecture (MCA) design, what is the greatest number of nonover-\nlapping channels that can be deployed in the 2.4 GHz ISM band?\nA. 3\nB. 12\nC. 11\nD. 14\nE. 4\n20. What factors should be taken into consid

In [16]:
from langchain_community.llms import Ollama
from langchain_community.chat_models import ChatOllama

In [17]:
ollama_model = ChatOllama(model='mistral:latest', temperature=0)

prompt = (
    f"Try to give a precise and to the point response of the user query and do answer all the questions there can be many questions: {query}. ",
    f"Never produce answer based on your knowledge but use the knowledge base that is provided to you: {Query_Search}. ",
    f"Don't repeat or keep words that are not human readable in the response"
)

In [18]:
json_data_example ={
          "type": "CO_CHANNEL_INTERFERENCE",
          "bssid": "a8:bd:27:87:e6:10",
          "ssid": "#momogoboom5",
          "band": 5.0,
          "criticality": "GOOD",
          "target": None,
          "timestamp": 1704284031210,
          "message": None,
          "averageChannelUtilization": None,
          "interferingBssids": None,
          "channelDetails":
          {
            "channel": 116,
            "frequency": 5580,
            "htMode": "ABOVE",
            "vhtMode": "WIDTH_80_MHZ",
            "heMode": "UNKNOWN",
            "band": 5.0
          },
          "thresholds":
          {
            "enabled": True,
            "signalStrength": -85,
            "bssidsHavingBssLoad": 2,
            "averageChannelUtilization": 40,
            "criticalOverlappingBssidsOn2400MHz": 4,
            "criticalOverlappingBssidsOn5000MHz": 2,
            "criticalOverlappingBssidsOn6000MHz": 2,
            "warningOverlappingBssidsOn2400MHz": 3,
            "warningOverlappingBssidsOn5000MHz": 1,
            "warningOverlappingBssidsOn6000MHz": 1,
            "downlinkThroughputThresholds": None
          }
        }

In [19]:
import json

json_data_example = json_data_example

prompt = (
    """
    I'll provide you with some JSON data and User Query.
    Act as if you are working as a tech lead in a networking firm. And based on these records and the JSON logs
    you've to respond to the query and solve it. I have provided you with 1 sample Answer how it should look like: 
    """
    f"""
    Given the JSON data representing an hour's worth of Wi-Fi test data and the user query:
    The JSON Logs are {json_data_example} and the query is {query}. 
    The ideal answer should go in a phased manner. Where you would work with every part of the question and take its insights from the knowledge base which is located here {Query_Search}. 
    

    The answer should somewhat look like this, The flow of solving a question, Please remember this is just an example and you shouldn't repeat all queries with the same answer this is just a sample answer. Use the knowledge base to solve the query that is given to you: 
    > From the provided information, it appears that there might be an interference issue between your access point and the nearby access point on the same channel (channel 116).
    >
    > The wide channel width (80MHz) of your access point might be causing interference, especially with the nearby access point operating on a narrower channel width (20MHz). Although the signal strength of your access point is stronger (-58.0 dBm) compared to the nearby one (-63.0 dBm), the difference might not be sufficient to eliminate interference, especially if both devices are close enough to each other.
    > 
    > The channel utilization of 18% on your access point indicates that there is some activity on that channel, although it's not excessively high. However, without knowing the channel utilization of the nearby access point, it's challenging to determine the extent of interference caused.
    > 
    > To address this:
    > 
    > Change Channel or Channel Width: Consider changing the channel of your access point to a less congested one or reducing the channel width to minimize interference with the nearby access point. You can use Wi-Fi analyzer tools to identify less crowded channels.
    > 
    > Evaluate Nearby Access Point's Utilization: If possible, try to gather information about the channel utilization of the nearby access point. If it's significantly high, it could contribute to interference. Adjust your channel or settings accordingly.
    > 
    > Optimize Signal Strength: While your access point has a stronger signal, adjusting the positioning or antenna orientation might further optimize its signal strength and quality within your desired coverage area.
    > 
    > Monitor and Test: Regularly monitor the network performance after making changes to assess whether the interference has reduced and the overall performance has improved.
    > 
    > Remember, mitigating interference often involves a combination of adjusting settings, channel selection, and monitoring network performance over time to find the most effective configuration for your specific environment.
 
   """
    )
    



In [20]:
response = ollama_model.invoke(prompt)


In [21]:
response.content

" Based on the given JSON data and user query, here's an example answer:\n\n> From the provided information, it seems that there might be a co-channel interference issue between your access point and the nearby one with the same channel (channel 116).\n>\n> The wide channel width (80MHz) of your access point could potentially cause interference, especially when in close proximity to a device using a narrower channel width (20MHz). Although your access point has a stronger signal (-58.0 dBm), the difference might not be enough to eliminate interference entirely.\n>\n> The channel utilization of 18% on your access point indicates some activity, but without knowing the channel utilization of the nearby access point, it's difficult to determine the extent of interference caused.\n>\n> To address this issue:\n>\n> Change Channel or Channel Width: Consider changing the channel of your access point to a less congested one or reducing the channel width to minimize interference with the nearby 

### New Prompt Try: New Approach

In [22]:
def convert_json_values_to_python(json_str):
    data = json.loads(json_str)
    return data

In [64]:
json_new ="""
        {
          "type": "COVERAGE",
          "bssid": "a8:bd:27:87:e6:10",
          "ssid": "#momogoboom5",
          "band": 5.0,
          "criticality": "GOOD",
          "target": null,
          "timestamp": 1704284031210,
          "signalStrength": -58.0,
          "stickyFactor": 0,
          "bestAlternativeSignalStrength": null,
          "thresholds":
          {
            "enabled": true,
            "signalStrengthThresholds":
            {
              "enabled": true,
              "thresholds":
              {
                "critical": -73.0,
                "warning": -70.0
              },
              "targetHost": null,
              "band": null
            },
            "roamingThresholds":
            {
              "enabled": true,
              "thresholds":
              {
                "critical": 2.0,
                "warning": 1.0
              },
              "targetHost": null,
              "band": null,
              "signalStrengthRangeMin": -90,
              "signalStrengthRangeMax": -20,
              "roamingConfigDelta": 7,
              "roamingConfigThreshold": -70
            },
            "bestAlternativeSignalStrengthThresholds":
            {
              "enabled": true,
              "thresholds":
              {
                "critical": 0.0,
                "warning": 0.0
              },
              "targetHost": null,
              "band": null
            }
          }
        }"""

In [65]:
new_json_parsed = convert_json_values_to_python(json_new)
new_json_parsed

{'type': 'COVERAGE',
 'bssid': 'a8:bd:27:87:e6:10',
 'ssid': '#momogoboom5',
 'band': 5.0,
 'criticality': 'GOOD',
 'target': None,
 'timestamp': 1704284031210,
 'signalStrength': -58.0,
 'stickyFactor': 0,
 'bestAlternativeSignalStrength': None,
 'thresholds': {'enabled': True,
  'signalStrengthThresholds': {'enabled': True,
   'targetHost': None,
   'band': None},
  'roamingThresholds': {'enabled': True,
   'targetHost': None,
   'band': None,
   'signalStrengthRangeMin': -90,
   'signalStrengthRangeMax': -20,
   'roamingConfigDelta': 7,
   'roamingConfigThreshold': -70},
  'bestAlternativeSignalStrengthThresholds': {'enabled': True,
   'targetHost': None,
   'band': None}}}

In [66]:
prompt = (
    f"""
    You are one of the most wisest extractor in the world, your job is to find
    important features from the json logs of a file to find which features should be used to
    extracted to formulate the knowledge base. What I mean is Using the logs find the number of 
    features that are very important to find the answer to any query that can arise from the logs.
    the logs are: {new_json_parsed}. Only give name of topics that should be referred to no extra word should be used
    
    """
)

In [67]:
infer = ollama_model.invoke(prompt)

In [68]:
print(infer.content)

 Based on the provided JSON log, the following features are important for querying and extracting knowledge:

1. bssid
2. ssid
3. band
4. criticality
5. signalStrength
6. thresholds (signalStrengthThresholds, roamingThresholds, bestAlternativeSignalStrengthThresholds)
7. timestamp.


In [69]:
query_2 = infer.content
query_embed = np.array(create_embeddings(query_2))
(query_embed).shape

(384,)

In [72]:
knowledge_base = x.query_milvus(query_embed)

In [73]:
new_query = ("Find if anything is wrong with my network, I'll provide the logs.")

In [74]:
prompt2 = (
    f"""
    I'll provide you with some JSON data and User Query.     Structure the answer in a very professional manner. It should seem that you are a Certified Network Professional who was all the knowledge about Networking. You should be professional
    Even if you dont know the answer simply say I dont Know about it never halucinate
    Act as if you are working as a tech lead in a networking firm. And based on these records and the JSON logs
    you've to respond to the query and solve it. I have provided you with 1 sample Answer how it should look like: 
    """
    f"""
    Given the JSON data representing an hour's worth of Wi-Fi test data and the user query:
    The JSON Logs are {new_json_parsed} and the query is {new_query}. 
    The ideal answer should go in a phased manner. Where you would work with every part of the question and take its insights from the knowledge base which is located here {knowledge_base}. 

    
    """

)

In [75]:
response = ollama_model.invoke(prompt2)

In [76]:
print(response.content)

 Based on the provided JSON data and user query, I'll walk you through a comprehensive analysis to identify any potential issues with your network.

Firstly, let me provide some context about the JSON logs. The data represents an hour's worth of Wi-Fi test data, which includes various parameters such as BSSID, SSID, band, signal strength, sticky factor, best alternative signal strength, thresholds, and criticality.

Now, let's focus on your query: "Find if anything is wrong with my network, I'll provide the logs."


1. Reviewing the logs:
The first step is to examine the JSON data and look for any entries with a criticality level other than 'GOOD'. In this case, all the log entries have a 'criticality' level of 'GOOD'. This indicates that there are no major issues with your network based on the provided data.

2. Signal strength:
Another important factor to consider is signal strength. The 'signalStrength' field in the JSON data represents the RSSI (Received Signal Strength Indicator) 