In [2]:
#r "nuget: Azure.AI.OpenAI, *-*"
#r "nuget: Azure, *-*"
#r "nuget: Azure.Identity, *-*"
#r "nuget: dotenv.net, *-*"
#r "nuget: Microsoft.DotNet.Interactive.AIUtilities, *-*"
#r "nuget: Microsoft.ML.Tokenizers, *-*"
#r "nuget: Microsoft.SemanticKernel.Core, *-*"
#r "nuget: Neo4j.Driver, *-*"

using Microsoft.DotNet.Interactive;
using Microsoft.DotNet.Interactive.AIUtilities;
using dotenv.net;
using Azure.AI.OpenAI;
using Azure;
using Azure.Identity;
using OpenAI.Chat;
using System;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.IO;
using Microsoft.SemanticKernel.Text;
using Microsoft.ML.Tokenizers;
using Neo4j.Driver;


Loading extensions from `C:\Users\haley\.nuget\packages\skiasharp\2.88.6\interactive-extensions\dotnet\SkiaSharp.DotNet.Interactive.dll`

Loading extension script from `C:\Users\haley\.nuget\packages\microsoft.dotnet.interactive.aiutilities\1.0.0-beta.24229.4\interactive-extensions\dotnet\extension.dib`

In [4]:
DotEnv.Load();

var envVars = DotEnv.Read();

AzureOpenAIClient client = new(new Uri(envVars["AZURE_OPENAI_ENDPOINT"]), 
    new AzureKeyCredential(envVars["AZURE_OPENAI_API_KEY"]));

var embeddings = envVars["AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT"];
var llm = envVars["AZURE_OPENAI_CHAT_DEPLOYMENT"];

In [33]:
public record DocunentMetadata(string id, string source);
public record ChunkMetadata(string id, string name, int sequence, string documentId, string text);
public record TripletRow(string head, string head_type, string relation, string tail, string tail_type);
public class EntityMetadata
{
    public string name { get; set; }
    public string type { get; set; }
    public string id { get; set; }
    public string text { get; set; }
    public Dictionary<string, ChunkMetadata> mentionedInChunks {get; set;} = new Dictionary<string, ChunkMetadata>();
}


In [105]:
ChatClient chatClient = client.GetChatClient(llm);
string fileName = "input/essay.txt";
string fileText = File.ReadAllText(fileName);

DocunentMetadata documentMetatdata = new (Guid.NewGuid().ToString("N"), fileName);

var tokenizer = TiktokenTokenizer.CreateForModel("gpt-4o");
#pragma warning disable SKEXP0050
var lines = TextChunker.SplitPlainTextLines(fileText, 500, text => tokenizer.CountTokens(text));
var paragraphs = TextChunker.SplitPlainTextParagraphs(lines, 500, 100, null, text => tokenizer.CountTokens(text));

string entityTypes = "BLOG_POST,BOOK,MOVIE,PRESENTATION,EVENT,ORGANIZATION,PERSON,PLACE,PRODUCT,REVIEW,ACTION";
string relationTypes = "INTRODUCED,USED_FOR,WRITTEN_IN,PART_OF,LOCATED_IN,GIVEN,LIVES_IN,TRAVELED_TO";

Dictionary<ChunkMetadata, List<TripletRow>> chunks = new Dictionary<ChunkMetadata, List<TripletRow>>();
int maxTripletsPerChunk = 10;
for (int i = 0; i < paragraphs.Count; i++)
{
    string text = paragraphs[i];

    ChunkMetadata chunkMetadata = new (Guid.NewGuid().ToString("N"), $"DocumentChunk{i}", i, documentMetatdata.id, text);

	string prompt =  $@"Please extract up to {maxTripletsPerChunk} knowledge triplets from the provied text.
    Each triplet should be in the form of (head, relation, tail) with their respective types.
    ######################
    ONTOLOGY:
    Entity Types: {entityTypes}
    Relation Types: {relationTypes}
    
    Use these entity types and relation types as a starting point, introduce new types if necessary based on the context.
    
    GUIDELINES:
    - Output in JSON format: [{{""head"": """", ""head_type"": """", ""relation"": """", ""tail"": """", ""tail_type"": """"}}]
    - Use the full form for entities (ie., 'Artificial Intelligence' instead of 'AI')
    - Keep entities and relation names concise (3-5 words max)
    - Break down complex phrases into multiple triplets
    - Ensure the knowledge graph is coherent and easily understandable
    ######################
    EXAMPLE:
    Text: Jason Haley, chief engineer of Jason Haley Consulting, wrote a new blog post titled 'Study Notes: GraphRAG - Property Grids' about creating a property grid RAG system using Semantic Kernel. 
    Output:
    [{{""head"": ""Jason Haley"", ""head_type"": ""PERSON"", ""relation"": ""SOFTWARE_DEVELOPER"", ""tail"": ""Jason Haley Consulting"", ""tail_type"": ""COMPANY""}},
     {{""head"": ""Jason Haley Consulting."", ""head_type"": ""COMPANY"", ""relation"": ""EMPLOYES"", ""tail"": ""Jason Haley"", ""tail_type"": ""PERSON""}},
     {{""head"": ""Study Notes: GraphRAG - Property Grids"", ""head_type"": ""BLOG_POST"", ""relation"": ""WRITTEN_BY"", ""tail"": ""Jason Haley"", ""tail_type"": ""PERSON""}},
     {{""head"": ""property grid RAG system"", ""head_type"": ""SOFTWARE_SYSTEM"", ""relation"": ""USES"", ""tail"": ""Semantic Kernel"", ""tail_type"": ""TECHNOLOGY""}}]
    ######################
    Text: {text}
    ######################
    Output:";

	ChatCompletion completion = chatClient.CompleteChat(
    	[
        	new UserChatMessage(prompt),
    	]);

	Console.WriteLine($"{completion.Role}: {completion.Content[0].Text}");
    List<TripletRow> rows =  JsonSerializer.Deserialize<List<TripletRow>>(completion.Content[0].Text.Replace("```json", "").Replace("```","").Replace("'", "").Trim());
    
    chunks.Add(chunkMetadata, rows);
}

Console.WriteLine($"Number of chunks: {chunks.Count}");

Assistant: ```json
[
    {"head": "Blog Post 1", "head_type": "CREATIVE_WORK", "relation": "TITLE", "tail": "Personal Update: Learning AI", "tail_type": "CREATIVE_WORK"},
    {"head": "Learning AI", "head_type": "BOOK", "relation": "WRITTEN_BY", "tail": "Jason", "tail_type": "PERSON"},
    {"head": "Four Stages of Competence", "head_type": "CONCEPT", "relation": "USED_FOR", "tail": "Learning AI", "tail_type": "BOOK"},
    {"head": "Four Stages of Competence", "head_type": "CONCEPT", "relation": "STAGE 1", "tail": "Unconscious Incompetence", "tail_type": "CONCEPT"},
    {"head": "Four Stages of Competence", "head_type": "CONCEPT", "relation": "STAGE 2", "tail": "Conscious Incompetence", "tail_type": "CONCEPT"},
    {"head": "Four Stages of Competence", "head_type": "CONCEPT", "relation": "STAGE 3", "tail": "Conscious Competence", "tail_type": "CONCEPT"},
    {"head": "Four Stages of Competence", "head_type": "CONCEPT", "relation": "STAGE 4", "tail": "Unconscious Competence", "tail_type"

In [81]:
public class Utilities
{    
    public static EntityMetadata PopulateEntityMetadata(ChunkMetadata chunkMetadata, TripletRow triplet, EntityMetadata entityMetadata, bool isHead = true)
    {
        entityMetadata.id = Guid.NewGuid().ToString("N");

        if (isHead)
        {
            entityMetadata.name = CreateName(triplet.head);
            entityMetadata.type = triplet.head_type;
            entityMetadata.text = triplet.head;
        }
        else
        {
            entityMetadata.name = CreateName(triplet.tail);
            entityMetadata.type = triplet.tail_type;
            entityMetadata.text = triplet.tail;
        }

        entityMetadata.mentionedInChunks.Add(chunkMetadata.id, chunkMetadata);
        
        return entityMetadata;
    }

    public static string CreateName(string text)
    {
        if (string.IsNullOrEmpty(text))
            return text;

        // Split the text into words
        string[] words = text.Split(new[] { ' ', '-', '_' }, StringSplitOptions.RemoveEmptyEntries);

        StringBuilder nameText = new StringBuilder();
        
        foreach (string word in words)
        {
            // Capitalize the first letter and make the rest lowercase
            var lword = word;
            if (char.IsDigit(word[0]))
            {
                lword = "_" + word;
            }

            nameText.Append(lword.ToLower());
        }
        return System.Text.RegularExpressions.Regex.Replace(nameText.ToString(), "[^a-zA-Z0-9_]", "");
    }
}

In [91]:
Dictionary<string,EntityMetadata> entities = new Dictionary<string,EntityMetadata>();

foreach (ChunkMetadata key in chunks.Keys)
{
    List<TripletRow> triplets = chunks[key];
    foreach (var triplet in triplets)
    {
        EntityMetadata entity;
        string pcHead = Utilities.CreateName(triplet.head);
        if (entities.ContainsKey(pcHead)) 
        {
            entity = entities[pcHead];
            if (!entity.mentionedInChunks.ContainsKey(key.id))
            {
                entity.mentionedInChunks.Add(key.id, key);
            }
        }
        else
        {
            entity = new EntityMetadata();   
            entities.Add(pcHead, Utilities.PopulateEntityMetadata(key, triplet, entity, true));
        }      

        string pcTail = Utilities.CreateName(triplet.tail);
        if (entities.ContainsKey(pcTail)) 
        {
            entity = entities[pcTail];
            if (!entity.mentionedInChunks.ContainsKey(key.id))
            {
                entity.mentionedInChunks.Add(key.id, key);
            }
        }
        else
        {
            entity = new EntityMetadata();   
            entities.Add(pcTail, Utilities.PopulateEntityMetadata(key, triplet, entity, false));
        }
    }
}

Console.WriteLine($"Unique entity count: {entities.Count}");

Unique entity count: 107


In [87]:
foreach(var key in entities.Keys)
{
    var e = entities[key];
    Console.WriteLine($"{key} Mentioned In {e.mentionedInChunks.Count} chunks");
}

jason Mentioned In 8 chunks
ai Mentioned In 1 chunks
michaelwashington Mentioned In 1 chunks
simplerag Mentioned In 1 chunks
azuresearchopenaidemoc Mentioned In 1 chunks
azuresearchopenaidemo Mentioned In 1 chunks
familyofragdemos Mentioned In 1 chunks
azuresearchopenaidemocsharp Mentioned In 1 chunks
azuresearchopenaijavascript Mentioned In 1 chunks
azuresearchopenaidemojava Mentioned In 1 chunks
azuresearchopenaidemojavascripttypescript Mentioned In 1 chunks
azuresearchopenaidemopython Mentioned In 2 chunks
hacktogethertheaichatapphack Mentioned In 1 chunks
azurevectorsearchaiassistant Mentioned In 1 chunks
bostoncodecamp_36 Mentioned In 1 chunks
talk Mentioned In 1 chunks
billwilder Mentioned In 2 chunks
virtualbostonazuremeetup Mentioned In 1 chunks
aiminiworkshop Mentioned In 1 chunks
mslearningpathapl_2005 Mentioned In 1 chunks
globalazurebootcamp Mentioned In 1 chunks
post_12 Mentioned In 1 chunks
semantickernelhelloworldpluginspart_2 Mentioned In 1 chunks
nativefunction Mention

In [93]:

List<string> entityCypherText = new List<string>(); // Document, DocumentChunk and Entity

entityCypherText.Add($"MERGE (Document1:DOCUMENT {{ id: '{documentMetatdata.id}', name:'Document1', type:'DOCUMENT', source: '{documentMetatdata.source}'}})"); 

foreach (var chunk in chunks.Keys)
{
    entityCypherText.Add($"MERGE (DocumentChunk{chunk.sequence}:DOCUMENT_CHUNK {{ id: '{chunk.id}', name: '{chunk.name}', type: 'DOCUMENT_CHUNK', documentId: '{chunk.documentId}', sequence: '{chunk.sequence}', text: \"{chunk.text.Replace("\"", "'")}\"}})");
    entityCypherText.Add($"MERGE (Document1)-[:CONTAINS]->(DocumentChunk{chunk.sequence})");
}

HashSet<string> types = new HashSet<string>();
foreach(var entity in entities.Keys)
{
    var labels = entities[entity];
    var pcEntity = entity;
    entityCypherText.Add($"MERGE ({pcEntity}:ENTITY {{ name: '{pcEntity}', type: '{labels.type}', id: '{labels.id}', text: '{labels.text}'}})");

    if (!types.Contains(labels.type))
    {
        types.Add(labels.type);
    }

    foreach(var key in labels.mentionedInChunks.Keys)
    {
        var documentChunk = labels.mentionedInChunks[key];
        entityCypherText.Add($"MERGE ({pcEntity})-[:MENTIONED_IN]->(DocumentChunk{documentChunk.sequence})");
    }
}

HashSet<string> relationships = new HashSet<string>();
foreach (ChunkMetadata key in chunks.Keys)
{
    List<TripletRow> triplets = chunks[key];
    foreach (var triplet in triplets)
    {
        var pcHead = Utilities.CreateName(triplet.head);
        var pcTail = Utilities.CreateName(triplet.tail);
        entityCypherText.Add($"MERGE ({pcHead})-[:{triplet.relation.Replace(" ", "_").Replace("-","_")}]->({pcTail})");

        string headRelationship = $"MERGE (DocumentChunk{key.sequence})-[:MENTIONS]->({pcHead})";
        if (!relationships.Contains(headRelationship))
        {
            relationships.Add(headRelationship);
            entityCypherText.Add(headRelationship);
        }
        
        string tailRelationship = $"MERGE (DocumentChunk{key.sequence})-[:MENTIONS]->({pcTail})";
        if (!relationships.Contains(tailRelationship))
        {
            relationships.Add(tailRelationship);
            entityCypherText.Add(tailRelationship);
        }
    }
}

In [86]:
foreach(var t in entityCypherText)
{
    Console.WriteLine(t);
}

MERGE (Document1:DOCUMENT { id: 'f6e5d401d98c43eb808bf2d819ae0df6', name:'Document1', type:'DOCUMENT', source: 'input/essay.txt'})
MERGE (DocumentChunk0:DOCUMENT_CHUNK { id: 'dd5b6e0011c748b1a0fcee1a066f7514', name: 'DocumentChunk0', type: 'DOCUMENT_CHUNK', documentId: 'f6e5d401d98c43eb808bf2d819ae0df6', sequence: '0', text: "Post 1 (Personal Update) Learning AI
This is the first of many blog posts I plan to make this year, stay tuned (please subscribe) for more soon. Learning AI Currently I am working my way through the four stages of competence with the topic of AI. This quarter (Q1 of 2024), I’m currently working on moving from stage 2 to stage 3 in the four stages of competence. For reference, those stages are: Unconscious incompetence Conscious incompetence Conscious competence Unconscious competence Last year I moved from stage 1 to stage 2: In the beginning of last year (2023) I had my head buried in the sand while all the other leaders in my industry were actively learning how 

In [78]:
foreach(var t in types.Keys)
{
    Console.WriteLine(t);
}

PERSON
TECHNOLOGY
CONCEPT
PRODUCT
EVENT
MISCELLANEOUS
ORGANIZATION
LOCATION


In [5]:
IAuthToken token = AuthTokens.Basic(
                envVars["NEO4J_USER"],
                envVars["NEO4J_PASSWORD"]
            );
IDriver driver = GraphDatabase.Driver(envVars["NEO4J_URI"], token);

QueryConfig config = new QueryConfig();


In [50]:
Console.WriteLine(entityCypherText.ToArray().Length);

467


In [94]:

using (var session = driver.AsyncSession())
{
    StringBuilder all = new StringBuilder();
    all.AppendJoin(Environment.NewLine, entityCypherText.ToArray());
    await driver.ExecutableQuery(all.ToString()).WithConfig(config).ExecuteAsync();
}



In [95]:
string createVectorIndex = @"CREATE VECTOR INDEX CHUNK_EMBEDDING IF NOT EXISTS
                            FOR (c:DOCUMENT_CHUNK) ON c.embedding
                            OPTIONS {indexConfig: {
                           `vector.dimensions`: 1536,
                            `vector.similarity_function`: 'cosine'
                            }}";

await driver.ExecutableQuery(createVectorIndex).WithConfig(config).ExecuteAsync();

In [96]:

string createFulltextIndex = @"CREATE FULLTEXT INDEX ENTITY_TEXT IF NOT EXISTS 
                                FOR (n:ENTITY) ON EACH [n.text]";
await driver.ExecutableQuery(createFulltextIndex).WithConfig(config).ExecuteAsync();

In [97]:

string populateEmbeddings = $@"
                            MATCH (n:DOCUMENT_CHUNK) WHERE n.text IS NOT NULL
                            WITH n, genai.vector.encode(
                                n.text,
                                'AzureOpenAI',
                                {{
                                    token: $token,
                                    resource: $resource,
                                    deployment: $deployment
                                }}) AS vector
                            CALL db.create.setNodeVectorProperty(n, 'embedding', vector)
                            ";
await driver.ExecutableQuery(populateEmbeddings)
    .WithParameters(new() { 
        {"token", envVars["AZURE_OPENAI_API_KEY"]}, 
        {"resource", envVars["AZURE_OPENAI_RESOURCE"]}, 
        {"deployment", envVars["AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT"]}})
    .WithConfig(config)
    .ExecuteAsync();

## Retrieval

In [6]:
//string questionText = "what are the blog post titles that are about Semantic Kernel?";
string questionText = "How many blog post did Jason write about Semantic Kernel and what is their titles?";

In [7]:
ChatClient chatClient = client.GetChatClient("chat");

int maxSynonyms = 10;

string prompt = $@"
Given a user question, generate synonyms or related keywords up to {maxSynonyms} in total, considering possible cases of capitalization, pluralization, and common expressions. Provide all synonyms/keywords separated by '~' symbols in a single line format: 'synonym1~synonyms2~...'.

QUERY: {questionText}
######################
KEYWORDS:
";
ChatCompletion completion = chatClient.CompleteChat(
    [
        new UserChatMessage(prompt),
    ]);

Console.WriteLine($"{completion.Role}: {completion.Content[0].Text}");

Assistant: How many blog posts did Jason write about Semantic Kernel^Jason's blog entries about Semantic Kernel^Semantic Kernel blog posts by Jason^Jason's Semantic Kernel articles^number of Jason's Semantic Kernel posts^titles of Jason's Semantic Kernel posts^Jason's blog entries on Semantic Kernel titles^Jason's posts about Semantic Kernel^Semantic Kernel articles by Jason^blog titles by Jason on Semantic Kernel


In [10]:
var synonyms = completion.Content[0].Text.Split("~");


var uniqueNodes = new Dictionary<string, string>();
foreach(var synonym in synonyms)
{
    Console.WriteLine(synonym);
    string cypher = $@"
                        CALL db.index.fulltext.queryNodes(""ENTITY_TEXT"", ""{synonym}"")
                        YIELD node AS e1
                        MATCH (e1)-[r]-(e2:ENTITY)
                        RETURN e1.id, e1.type, e1.text, e2.name, e2.type, e2.text, type(r)
                    ";

    var textSearchResult = await driver.ExecutableQuery(cypher)
                    .WithConfig(config)
                    .ExecuteAsync();
    if (textSearchResult.Result.Count() > 0)
    {
        foreach(var r in textSearchResult.Result)
        {
            var tripletText = $"{r["e1.text"]} -> {r["type(r)"]} -> {r["e2.text"]}";
            if (!uniqueNodes.ContainsKey(tripletText))
            {
                uniqueNodes.Add(tripletText,tripletText);
            }   
        }
    }
}

Console.WriteLine("");
Console.WriteLine($"{uniqueNodes.Count} Unique nodes with matches:");
foreach(var key in uniqueNodes.Keys)
{
    Console.WriteLine($"{key}");
}

How many blog posts did Jason write about Semantic Kernel^Jason's blog entries about Semantic Kernel^Semantic Kernel blog posts by Jason^Jason's Semantic Kernel articles^number of Jason's Semantic Kernel posts^titles of Jason's Semantic Kernel posts^Jason's blog entries on Semantic Kernel titles^Jason's posts about Semantic Kernel^Semantic Kernel articles by Jason^blog titles by Jason on Semantic Kernel


Error: Neo4j.Driver.ClientException: Failed to invoke procedure `db.index.fulltext.queryNodes`: Caused by: org.apache.lucene.queryparser.classic.TokenMgrError: Lexical error at line 1, column 59.  Encountered: '74' (74),
   at Neo4j.Driver.Internal.MessageHandling.ResponsePipelineError.EnsureThrown()
   at Neo4j.Driver.Internal.Result.ResultCursorBuilder.NextRecordAsync()
   at Neo4j.Driver.Internal.Result.ResultCursor.MoveNextAsync()
   at Neo4j.Driver.Internal.Driver.<>c__DisplayClass29_0.<<GetRowsAsync>g__Process|0>d.MoveNext()
--- End of stack trace from previous location ---
   at Neo4j.Driver.Internal.Driver.<>c__DisplayClass29_0.<<GetRowsAsync>g__Process|0>d.MoveNext()
--- End of stack trace from previous location ---
   at Neo4j.Driver.Internal.Driver.<>c__DisplayClass37_0`1.<<TransformCursor>g__TransformCursorImpl|0>d.MoveNext()
--- End of stack trace from previous location ---
   at Neo4j.Driver.Internal.Driver.Work[T](Query q, IAsyncQueryRunner x, Func`3 process, CancellationToken cancellationToken)
   at Neo4j.Driver.Internal.AsyncSession.<>c__DisplayClass52_0`1.<<RunTransactionAsync>b__1>d.MoveNext()
--- End of stack trace from previous location ---
   at Neo4j.Driver.Internal.AsyncSession.<>c__DisplayClass52_0`1.<<RunTransactionAsync>b__1>d.MoveNext()
--- End of stack trace from previous location ---
   at Neo4j.Driver.Internal.AsyncRetryLogic.RetryAsync[T](Func`1 runTxAsyncFunc)
   at Neo4j.Driver.Internal.Logging.DriverLoggerUtil.TryExecuteAsync[T](ILogger logger, Func`1 func, String message)
   at Neo4j.Driver.Internal.Driver.ExecuteQueryAsyncInternal[T](Query query, QueryConfig config, CancellationToken cancellationToken, Func`3 cursorProcessor)
   at Neo4j.Driver.Internal.Driver.ExecuteQueryAsyncInternal[T](Query query, QueryConfig config, CancellationToken cancellationToken, Func`3 cursorProcessor)
   at Neo4j.Driver.Internal.Driver.GetRowsAsync(Query query, QueryConfig config, Action`1 streamProcessor, CancellationToken cancellationToken)
   at Neo4j.Driver.Internal.ReducedExecutableQuery`3.ExecuteAsync(CancellationToken token)
   at Submission#12.<<Initialize>>d__0.MoveNext()
--- End of stack trace from previous location ---
   at Microsoft.CodeAnalysis.Scripting.ScriptExecutionState.RunSubmissionsAsync[TResult](ImmutableArray`1 precedingExecutors, Func`2 currentExecutor, StrongBox`1 exceptionHolderOpt, Func`2 catchExceptionOpt, CancellationToken cancellationToken)

In [101]:
string question = $@"
                    WITH genai.vector.encode(
                        $question,
                        'AzureOpenAI',
                        {{
                            token: $token,
                            resource: $resource,
                            deployment: $deployment
                        }}) AS question_embedding
                    CALL db.index.vector.queryNodes(
                        'CHUNK_EMBEDDING',
                        $top_k, 
                        question_embedding
                        ) YIELD node AS chunk, score 
                    RETURN chunk.id, chunk.text, score
                    ";

var chunkResult = await driver.ExecutableQuery(question)
                .WithParameters(new() { 
                    {"question", questionText},
                    {"token", envVars["AZURE_OPENAI_API_KEY"]}, 
                    {"resource", envVars["AZURE_OPENAI_RESOURCE"]}, 
                    {"deployment", envVars["AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT"]},
                    {"top_k", 5}})
                .WithConfig(config)
                .ExecuteAsync();

In [102]:
Console.WriteLine(JsonSerializer.Serialize(chunkResult, new JsonSerializerOptions {
             WriteIndented = true
         }));

StringBuilder chunkTexts = new StringBuilder();
foreach(var r in chunkResult.Result)
{
    chunkTexts.AppendLine($"Document: {{ text: {r["chunk.text"].ToString()} }}");
}

Console.WriteLine(chunkTexts.ToString());

{
  "Keys": [
    "chunk.id",
    "chunk.text",
    "score"
  ],
  "Result": [
    {
      "chunk.id": "6961feeb0bf14914a7b8bbecfd6566e1",
      "chunk.text": "Post 17 Semantic Kernel Hello World WebSearchEnginePlugin\nA couple of weeks ago I thought I\u2019d written my last of these blogs, mainly due to me getting more in depth with Semantic Kernel. However, after I watched Will Velida\u2019s video Using Bing Search API in the Semantic Kernel SDK \u2026 I couldn\u2019t help but wonder what the API calls were behind the scenes. Will does a great job at explaining how to use the plugin and the Bing resource needed to make calls to the search API, so I won\u2019t get into that part of it - I want to focus on the usefulness and API calls made by the plugin.\nPosted byJason Monday, June 10, 2024\n\nPost 18 Demo Review: Chat Copilot\nDemo Review: Chat Copilot This is the fifth C# demo in The RAG Demo Chronicles (Blog Series) and has the most extensive use of Semantic Kernel out of all the d

In [109]:
ChatClient chatClient = client.GetChatClient("chat");

string prompt = $@"Question: {questionText}
                ######################
                Answer:";



string sysprompt = @"Be brief in your answers.
                    Answer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.
                    For tabular information return it as an html table. Do not return markdown format. If the question is not in English, answer in the language used in the question.";

ChatCompletion completion = chatClient.CompleteChat(
    [
        new SystemChatMessage(sysprompt),
        new UserChatMessage(prompt),
    ]);

Console.WriteLine($"{completion.Role}: {completion.Content[0].Text}");

Assistant: I don't know.


In [None]:
ChatClient chatClient = client.GetChatClient("chat");

string context = $@"Structured data:
    {string.Join(Environment.NewLine, uniqueNodes.Keys.ToArray())}
Unstructured data:
{chunkTexts.ToString()}
";

//Console.WriteLine(context);

string prompt = $@"Answer the question based only on the following context:
			    {context}
                ######################
                Question: {questionText}
                ######################
                Answer:";


string sysprompt = @"Be brief in your answers.
                    Answer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.
                    For tabular information return it as an html table. Do not return markdown format. If the question is not in English, answer in the language used in the question.";

ChatCompletion completion = chatClient.CompleteChat(
    [
        new SystemChatMessage(sysprompt),
        new UserChatMessage(prompt),
    ]);

Console.WriteLine($"{completion.Role}: {completion.Content[0].Text}");

In [131]:
string tuple_delimiter = "^";
string completion_delimiter = "<COMPLETE>";
string record_delimiter = "<EOR>";
string entity_types = """organization"", ""person"", ""geo"", ""event""";

ChatClient chatClient = client.GetChatClient(llm);

string fileText = File.ReadAllText("input/essay.txt");

var tokenizer = Tiktoken.CreateTiktokenForModel("gpt-4o");
#pragma warning disable SKEXP0050
var lines = TextChunker.SplitPlainTextLines(fileText, 500, text => tokenizer.CountTokens(text));
var paragraphs = TextChunker.SplitPlainTextParagraphs(lines, 500, 100, null, text => tokenizer.CountTokens(text));

int max_knowledge_triplets = 10;
for (int i = 0; i < 2; i++)
{
    string input_text = paragraphs[i];
    
string prompt = $@"
-Goal-
Given a text document that is potentially relevant to this activity and a list of entity types, identify all entities of those types from the text and all relationships among the identified entities.

-Steps-
1. Identify all entities. For each identified entity, extract the following information:
- entity_name: Name of the entity, capitalized
- entity_type: One of the following types: [{entity_types}]
- entity_description: Comprehensive description of the entity's attributes and activities
Format each entity as (""entity""{tuple_delimiter}<entity_name>{tuple_delimiter}<entity_type>{tuple_delimiter}<entity_description>

2. From the entities identified in step 1, identify all pairs of (source_entity, target_entity) that are *clearly related* to each other.
For each pair of related entities, extract the following information:
- source_entity: name of the source entity, as identified in step 1
- target_entity: name of the target entity, as identified in step 1
- relationship_description: explanation as to why you think the source entity and the target entity are related to each other
- relationship_strength: a numeric score indicating strength of the relationship between the source entity and target entity
 Format each relationship as (""relationship""{tuple_delimiter}<source_entity>{tuple_delimiter}<target_entity>{tuple_delimiter}<relationship_description>{tuple_delimiter}<relationship_strength>)

3. Return output in English as a single list of all the entities and relationships identified in steps 1 and 2. Use **{record_delimiter}** as the list delimiter.

4. When finished, output {completion_delimiter}

######################
-Examples-
######################
Example 1:

Entity_types: [person, technology, mission, organization, location]
Text:
while Alex clenched his jaw, the buzz of frustration dull against the backdrop of Taylor's authoritarian certainty. It was this competitive undercurrent that kept him alert, the sense that his and Jordan's shared commitment to discovery was an unspoken rebellion against Cruz's narrowing vision of control and order.

Then Taylor did something unexpected. They paused beside Jordan and, for a moment, observed the device with something akin to reverence. �If this tech can be understood..."" Taylor said, their voice quieter, ""It could change the game for us. For all of us.�

The underlying dismissal earlier seemed to falter, replaced by a glimpse of reluctant respect for the gravity of what lay in their hands. Jordan looked up, and for a fleeting heartbeat, their eyes locked with Taylor's, a wordless clash of wills softening into an uneasy truce.

It was a small transformation, barely perceptible, but one that Alex noted with an inward nod. They had all been brought here by different paths
################
Output:
(""entity""{tuple_delimiter}""Alex""{tuple_delimiter}""person""{tuple_delimiter}""Alex is a character who experiences frustration and is observant of the dynamics among other characters.""){record_delimiter}
(""entity""{tuple_delimiter}""Taylor""{tuple_delimiter}""person""{tuple_delimiter}""Taylor is portrayed with authoritarian certainty and shows a moment of reverence towards a device, indicating a change in perspective.""){record_delimiter}
(""entity""{tuple_delimiter}""Jordan""{tuple_delimiter}""person""{tuple_delimiter}""Jordan shares a commitment to discovery and has a significant interaction with Taylor regarding a device.""){record_delimiter}
(""entity""{tuple_delimiter}""Cruz""{tuple_delimiter}""person""{tuple_delimiter}""Cruz is associated with a vision of control and order, influencing the dynamics among other characters.""){record_delimiter}
(""entity""{tuple_delimiter}""The Device""{tuple_delimiter}""technology""{tuple_delimiter}""The Device is central to the story, with potential game-changing implications, and is revered by Taylor.""){record_delimiter}
(""relationship""{tuple_delimiter}""Alex""{tuple_delimiter}""Taylor""{tuple_delimiter}""Alex is affected by Taylor's authoritarian certainty and observes changes in Taylor's attitude towards the device.""{tuple_delimiter}7){record_delimiter}
(""relationship""{tuple_delimiter}""Alex""{tuple_delimiter}""Jordan""{tuple_delimiter}""Alex and Jordan share a commitment to discovery, which contrasts with Cruz's vision.""{tuple_delimiter}6){record_delimiter}
(""relationship""{tuple_delimiter}""Taylor""{tuple_delimiter}""Jordan""{tuple_delimiter}""Taylor and Jordan interact directly regarding the device, leading to a moment of mutual respect and an uneasy truce.""{tuple_delimiter}8){record_delimiter}
(""relationship""{tuple_delimiter}""Jordan""{tuple_delimiter}""Cruz""{tuple_delimiter}""Jordan's commitment to discovery is in rebellion against Cruz's vision of control and order.""{tuple_delimiter}5){record_delimiter}
(""relationship""{tuple_delimiter}""Taylor""{tuple_delimiter}""The Device""{tuple_delimiter}""Taylor shows reverence towards the device, indicating its importance and potential impact.""{tuple_delimiter}9){completion_delimiter}
#############################
Example 2:

Entity_types: [person, technology, mission, organization, location]
Text:
They were no longer mere operatives; they had become guardians of a threshold, keepers of a message from a realm beyond stars and stripes. This elevation in their mission could not be shackled by regulations and established protocols�it demanded a new perspective, a new resolve.

Tension threaded through the dialogue of beeps and static as communications with Washington buzzed in the background. The team stood, a portentous air enveloping them. It was clear that the decisions they made in the ensuing hours could redefine humanity's place in the cosmos or condemn them to ignorance and potential peril.

Their connection to the stars solidified, the group moved to address the crystallizing warning, shifting from passive recipients to active participants. Mercer's latter instincts gained precedence� the team's mandate had evolved, no longer solely to observe and report but to interact and prepare. A metamorphosis had begun, and Operation: Dulce hummed with the newfound frequency of their daring, a tone set not by the earthly
#############
Output:
(""entity""{tuple_delimiter}""Washington""{tuple_delimiter}""location""{tuple_delimiter}""Washington is a location where communications are being received, indicating its importance in the decision-making process.""){record_delimiter}
(""entity""{tuple_delimiter}""Operation: Dulce""{tuple_delimiter}""mission""{tuple_delimiter}""Operation: Dulce is described as a mission that has evolved to interact and prepare, indicating a significant shift in objectives and activities.""){record_delimiter}
(""entity""{tuple_delimiter}""The team""{tuple_delimiter}""organization""{tuple_delimiter}""The team is portrayed as a group of individuals who have transitioned from passive observers to active participants in a mission, showing a dynamic change in their role.""){record_delimiter}
(""relationship""{tuple_delimiter}""The team""{tuple_delimiter}""Washington""{tuple_delimiter}""The team receives communications from Washington, which influences their decision-making process.""{tuple_delimiter}7){record_delimiter}
(""relationship""{tuple_delimiter}""The team""{tuple_delimiter}""Operation: Dulce""{tuple_delimiter}""The team is directly involved in Operation: Dulce, executing its evolved objectives and activities.""{tuple_delimiter}9){completion_delimiter}
#############################
Example 3:

Entity_types: [person, role, technology, organization, event, location, concept]
Text:
their voice slicing through the buzz of activity. ""Control may be an illusion when facing an intelligence that literally writes its own rules,"" they stated stoically, casting a watchful eye over the flurry of data.

""It's like it's learning to communicate,"" offered Sam Rivera from a nearby interface, their youthful energy boding a mix of awe and anxiety. ""This gives talking to strangers' a whole new meaning.""

Alex surveyed his team�each face a study in concentration, determination, and not a small measure of trepidation. ""This might well be our first contact,"" he acknowledged, ""And we need to be ready for whatever answers back.""

Together, they stood on the edge of the unknown, forging humanity's response to a message from the heavens. The ensuing silence was palpable�a collective introspection about their role in this grand cosmic play, one that could rewrite human history.

The encrypted dialogue continued to unfold, its intricate patterns showing an almost uncanny anticipation
#############
Output:
(""entity""{tuple_delimiter}""Sam Rivera""{tuple_delimiter}""person""{tuple_delimiter}""Sam Rivera is a member of a team working on communicating with an unknown intelligence, showing a mix of awe and anxiety.""){record_delimiter}
(""entity""{tuple_delimiter}""Alex""{tuple_delimiter}""person""{tuple_delimiter}""Alex is the leader of a team attempting first contact with an unknown intelligence, acknowledging the significance of their task.""){record_delimiter}
(""entity""{tuple_delimiter}""Control""{tuple_delimiter}""concept""{tuple_delimiter}""Control refers to the ability to manage or govern, which is challenged by an intelligence that writes its own rules.""){record_delimiter}
(""entity""{tuple_delimiter}""Intelligence""{tuple_delimiter}""concept""{tuple_delimiter}""Intelligence here refers to an unknown entity capable of writing its own rules and learning to communicate.""){record_delimiter}
(""entity""{tuple_delimiter}""First Contact""{tuple_delimiter}""event""{tuple_delimiter}""First Contact is the potential initial communication between humanity and an unknown intelligence.""){record_delimiter}
(""entity""{tuple_delimiter}""Humanity's Response""{tuple_delimiter}""event""{tuple_delimiter}""Humanity's Response is the collective action taken by Alex's team in response to a message from an unknown intelligence.""){record_delimiter}
(""relationship""{tuple_delimiter}""Sam Rivera""{tuple_delimiter}""Intelligence""{tuple_delimiter}""Sam Rivera is directly involved in the process of learning to communicate with the unknown intelligence.""{tuple_delimiter}9){record_delimiter}
(""relationship""{tuple_delimiter}""Alex""{tuple_delimiter}""First Contact""{tuple_delimiter}""Alex leads the team that might be making the First Contact with the unknown intelligence.""{tuple_delimiter}10){record_delimiter}
(""relationship""{tuple_delimiter}""Alex""{tuple_delimiter}""Humanity's Response""{tuple_delimiter}""Alex and his team are the key figures in Humanity's Response to the unknown intelligence.""{tuple_delimiter}8){record_delimiter}
(""relationship""{tuple_delimiter}""Control""{tuple_delimiter}""Intelligence""{tuple_delimiter}""The concept of Control is challenged by the Intelligence that writes its own rules.""{tuple_delimiter}7){completion_delimiter}
#############################
-Real Data-
######################
Entity_types: {entity_types}
Text: {input_text}
######################
Output:";

    ChatCompletion completion = chatClient.CompleteChat(
    	[
        	new UserChatMessage(prompt),
    	]);

	Console.WriteLine($"{completion.Role}: {completion.Content[0].Text}");
}

Assistant: ("entity"^"Jason"^"person"^"Jason is the author of the blog posts and is actively learning AI, moving through the four stages of competence.")<EOR>
("entity"^"Michael Washington"^"person"^"Michael Washington is mentioned as a demo creator for RAG (Retrieval Augmented Generation) applications using various technologies.")<EOR>
("entity"^"AI"^"technology"^"Artificial Intelligence, abbreviated as AI, is the primary focus of learning and development in these blog posts.")<EOR>
("entity"^"GenAI"^"technology"^"Generative AI, abbreviated as GenAI, is a key technology being integrated into business applications.")<EOR>
("entity"^"ChatGPT"^"technology"^"ChatGPT is highlighted as a significant AI tool that leaders in the industry are using.")<EOR>
("entity"^"Blazor"^"technology"^"Blazor is a framework mentioned in the context of building RAG applications.")<EOR>
("entity"^"SQL Server"^"technology"^"SQL Server is frequently mentioned as a relational database tool utilized in RAG applic