# Installing the Libraries
This section installs all the necessary libraries that are required for running the notebook.

In [None]:
%pip install graphrag nest_asyncio httpx langchain-ollama 


Initialize Indexing for creating the configuration files. 
--------------------------------------------------------
--init: Creates the necessary configuration files for indexing.
--root .: Sets the root directory to the current directory.
```python

In [None]:
!python -m graphrag.index --init  --root . 


Initialize Indexing for creating the configuration files. 
--------------------------------------------------------
rerunning the indexing process on the root directory ( . )
```python

In [None]:
!python -m graphrag.index --root . 

# Running a Query with Graphrag
This section runs a query with Graphrag.

--root .: Sets the root directory to the current directory.
--method local: Indicates that the method to be used is local.

```python

In [None]:
!python3 -m graphrag.query --root . --method local  "Who can be granted temporary protection? 


# Counting Characters Until a Marker in Query Output

In [None]:
import pandas as pd
import numpy as np
import subprocess
import os


def count_characters_until_marker(output, marker="SUCCESS: Local Search Response:"):
    """
    Counts the number of characters from the start of the output
    until the end of the marker string.

    Args:
        output (str): The entire output text to search.
        marker (str): The marker to search for in the output text.

    Returns:
        int: The number of characters till the end of the marker.
             Returns -1 if the marker is not found.
    """
    marker_index = output.find(marker)
    if marker_index != -1:
        return marker_index + len(marker)
    else:
        return -1
    
result = subprocess.run(['python3', '-m', 'graphrag.query', '--root', '.', '--method', 'local', "Who can be granted temporary protection?"], stdout=subprocess.PIPE)
output = result.stdout.decode('utf-8')

# Count the characters till the end of the marker
char_count = count_characters_until_marker(output)
print(f"Number of characters until the end of marker: {char_count}")

# Creating Queries with Starting Prompts



In [None]:

def get_len_cat(text):
    return round((len(text) * 1.5) + char_count)
 
def create_query_with_start_prompt(row):
    answer_length = ' The answer should be maximum ' + str(get_len_cat(row['answer'])) + ' characters.'
    if pd.isnull(row['start_with_prompt']):
        query= row['question'] + answer_length
    else:
        query= str(row['start_with_prompt']) + row['question'] + answer_length    
    return query

def create_query_without_start_prompt(row):
    answer_length = ' The answer should be maximum ' + str(get_len_cat(row['answer'])) + ' characters.'
    query=  row['question'] + answer_length    
    return query


# Reading test data

In [None]:

df = pd.read_csv(r"hyperrag\MSgraphrag\testdata\migri_faq.csv", sep=";")
# print some rows where there is a valid start_with_prompt
df.head(10)


# Querying Graphrag for Each Row and Adding the Result to the DataFrame

In [None]:

start_marker = "SUCCESS: Local Search Response:"

for index, row in df.iterrows():
    query = create_query_without_start_prompt(row)
    print(f"Query: {query}")
    result = subprocess.run(['python3', '-m', 'graphrag.query', '--root', '.', '--method', 'local', query], stdout=subprocess.PIPE)
    output = result.stdout.decode('utf-8')
    if start_marker in output:
        extracted_text = output.split(start_marker)[-1].strip()
    else:
        print("Error: 'SUCCESS: Local Search Response:' not found in output")
        extracted_text = None
    # add the extracted text to the dataframe as a new column "ms_graphgraph_answer"
    df.loc[index, "ms_graphgraph_answer"] = extracted_text
    print(df.loc[index, "ms_graphgraph_answer"])


In [None]:
# save the df with the new column into csv

df.to_csv(r"hyperrag\MSgraphrag\testdata\migi_faq_with_ms_graphgraph.csv", sep=";", index=False)

In [None]:
df.head(10)

In [None]:
# take each row in the dataframe, create query and fet the answer from the graphrag,, then store it as a new column in dataframe

# query is run like this  !python3 -m graphrag.query --root . --method local  "Who can be granted temporary protection? 

import subprocess
import json
import nest_asyncio
nest_asyncio.apply()

def query_graphrag(query):
    result = subprocess.run(['python3', '-m', 'graphrag.query', '--root', '.', '--method', 'local', query], stdout=subprocess.PIPE)
    # get the answer from the result
    answer = json.loads(result.stdout)['answer']
    return answer


# df['query'] = df.apply(create_query_without_start_prompt, axis=1)
# df['answer'] = df['query'].apply(lambda x: query_graphrag(x))
df['ms_graphrag_answer'] = df['question'].apply(lambda x: query_graphrag(x))


