In [41]:
import os  
import re  
import base64  
import json  
from dotenv import load_dotenv  
from azure.identity import DefaultAzureCredential, get_bearer_token_provider  
from azure.search.documents import SearchClient  
from azure.search.documents.models import QueryType, VectorizedQuery, QueryAnswerType, QueryCaptionType  
from openai import AsyncAzureOpenAI  
from backend.auth.auth_utils import get_authenticated_user_details  
from backend.security.ms_defender_utils import get_msdefender_user_json  
from pydantic import BaseModel, Field  
from typing import List, Dict, Optional
import instructor
# import nest_asyncio
# nest_asyncio.apply()
  
load_dotenv()  
  
class AzureSearchService:  
    def __init__(self):  
        self.service_endpoint = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT")  
        self.wiki_index = os.getenv("AZURE_SEARCH_INDEX_WIKI")  
        self.video_index = os.getenv("AZURE_SEARCH_INDEX_VIDEO")  
        self.api_version = os.environ.get("AZURE_OPENAI_PREVIEW_API_VERSION")  
        self.aoai_api_key = os.environ.get("AZURE_OPENAI_KEY")  
        self.embedding_model = os.environ.get("AZURE_OPENAI_EMBEDDING_NAME")  
        self.chat_model = os.environ.get("AZURE_OPENAI_MODEL")  
        self.credential = DefaultAzureCredential()  
        self.ad_token_provider = get_bearer_token_provider(self.credential, "https://cognitiveservices.azure.com/.default")  
        self.azure_endpoint = os.environ.get("AZURE_OPENAI_ENDPOINT") or f"https://{os.environ.get('AZURE_OPENAI_RESOURCE')}.openai.azure.com/"  
        self.default_headers = {"x-ms-useragent": "GitHubSampleWebApp/AsyncAzureOpenAI/1.0.0"}  
        self.wiki_search_client = SearchClient(self.service_endpoint, self.wiki_index, self.credential)  
        self.video_search_client = SearchClient(self.service_endpoint, self.video_index, self.credential)  
        self.authenticated_user_details = get_authenticated_user_details({})  
        self.conversation_id = None  
        self.user_json = get_msdefender_user_json(self.authenticated_user_details, {}, self.conversation_id)  
        self.openai_client = AsyncAzureOpenAI(  
            api_version=self.api_version,  
            api_key=self.aoai_api_key,  
            azure_ad_token_provider=self.ad_token_provider,  
            azure_endpoint=self.azure_endpoint,  
            default_headers=self.default_headers,  
        )  
  
    @staticmethod  
    def convert_timestamp_to_seconds(timestamp_str):  
        match = re.search(r'(\d{2}):(\d{2}):(\d{2})', timestamp_str)  
        if match:  
            hours, minutes, seconds = map(int, match.groups())  
            total_seconds = hours * 3600 + minutes * 60 + seconds  
            return total_seconds  
        else:  
            print("No valid timestamp found in the string")  
            return 0  
  
    @staticmethod  
    def extract_integer(value):  
        if isinstance(value, int):  
            return value  
        if isinstance(value, str):  
            match = re.search(r'\d+', value)  
            if match:  
                return int(match.group())  
        raise ValueError("Input must be an integer or a string representing a single integer.")  
  
    @staticmethod  
    def is_video_link(url):  
        video_extensions = ['mp4', 'mkv', 'avi', 'mov', 'wmv', 'flv', 'webm']  
        extension = url.split('.')[-1]  
        return extension in video_extensions  
  
    @staticmethod  
    def generate_base64_encoded_string(start_time_in_seconds):  
        data = {  
            "referralInfo": {  
                "referralApp": "StreamWebApp",  
                "referralView": "ShareDialog-Link",  
                "referralAppPlatform": "Web",  
                "referralMode": "view"  
            },  
            "playbackOptions": {  
                "startTimeInSeconds": start_time_in_seconds  
            }  
        }  
        json_string = json.dumps(data)  
        base64_encoded = base64.b64encode(json_string.encode('utf-8')).decode('utf-8')  
        return "&nav=" + base64_encoded  
  
    @staticmethod  
    def clean_url(url):  
        clean_url = re.sub(r'([?&]nav=).*', '', url)  
        if clean_url[-1] == '?' or clean_url[-1] == '&':  
            clean_url = clean_url[:-1]  
        return clean_url  
  
    @staticmethod  
    def remove_duplicates(lst):  
        seen = set()  
        unique_lst = []  
        for item in lst:  
            item_tuple = tuple(item)  
            if item_tuple not in seen:  
                seen.add(item_tuple)  
                unique_lst.append(item)  
        return unique_lst  
  
    async def generate_embeddings(self, query, model):  
        embeddings_response = await self.openai_client.embeddings.create(model=model, input=query)  
        embedding = embeddings_response.data[0].embedding  
        return embedding  
  
    async def process_query(self, query):  
        vector_query = VectorizedQuery(  
            vector=await self.generate_embeddings(query, self.embedding_model),  
            k_nearest_neighbors=3, fields="text_vector",  
        )  
  
        results_wiki = list(self.wiki_search_client.search(  
            search_text=query,  
            vector_queries=[vector_query],  
            select=["title", "chunk", "url_metadata"],  
            query_type=QueryType.SEMANTIC,  
            semantic_configuration_name="semantic",  
            query_caption=QueryCaptionType.EXTRACTIVE,  
            query_answer=QueryAnswerType.EXTRACTIVE,  
            top=3,  
        ))  
  
        results_video = list(self.video_search_client.search(  
            search_text=query,  
            vector_queries=[vector_query],  
            select=["title", "chunk", "url_metadata"],  
            query_type=QueryType.SEMANTIC,  
            semantic_configuration_name="semantic",  
            query_caption=QueryCaptionType.EXTRACTIVE,  
            query_answer=QueryAnswerType.EXTRACTIVE,  
            top=3,  
        ))  
  
        results = results_wiki + results_video  
        for d in results:  
            if d in results_wiki:  
                d['container'] = 'wiki'  
            elif d in results_video:  
                d['container'] = 'video'  
  
        sorted_data = sorted(results, key=lambda x: x["@search.reranker_score"], reverse=True)  
        selected_chunks = sorted_data[:3]  
  
        context_str = f"""**documents: 1**  
        {selected_chunks[0]['chunk']}  
  
        **documents: 2**  
        {selected_chunks[1]['chunk']}  
        **documents: 3**  
        {selected_chunks[2]['chunk']}"""  
  
        RAG_SYSTEM_PROMPT = f"""\
Context information is below.
---------------------
{context_str}
---------------------
INSTRUCTIONS:
1. You are an assistant who helps users answer their queries.
2. Answer the user's question from the above Context. The Context is provided in the form of multiple documents, each identified by a document number. If a document is a transcript, it also includes timestamps in the format HH:MM on each line above the text.
3. Give answer in step by step format.
4. Keep your answer concise and solely on the information given in the Context above.
5. Always provide the answer with all relevant citations, ensuring that each citation includes the corresponding timestamp and document number used to generate the response. Provide the citation in the following format only at the end of the whole answer not in between.
    - For transcript, use: [timestamp, documents number]. for example [["00:11:00", 1], ["00:1:44", 2]]
    - For non transcript, use: ["", documents number]. for example [["", 3],["", 1], ["", 2]]
7. Do not create or derive your own answer. If the answer is not directly available in the context, just reply stating, 'There is no answer available'
"""
        class AnswerCitation(BaseModel):  
            citation: List[List] = Field(description="Include all the citations")  
            answer: str = Field(description="only include Answer, do not include citations in this")  
  
        instructor_client = instructor.from_openai(self.openai_client)  
  
        user_info = await instructor_client.chat.completions.create(  
            model=self.chat_model,  
            response_model=AnswerCitation,  
            messages=[{"role": "system", "content": RAG_SYSTEM_PROMPT},  
                      {"role": "user", "content": query}],  
            user=self.user_json  
        )  
  
        answer = user_info.answer  
        fields_mapping = []  
        for i in self.remove_duplicates(user_info.citation):  
            index = self.extract_integer(i[1]) - 1  
            url_metadata = selected_chunks[index]['url_metadata']  
            title = selected_chunks[index]['title']  
            container = selected_chunks[index]['container']  
            content_columns = selected_chunks[index]['chunk']  
            start_time = None  
            if container == 'video':  
                start_time = self.convert_timestamp_to_seconds(i[0])  
                if self.is_video_link(url_metadata):  
                    timestamp_link = url_metadata + f"#t={start_time}"  
                else:  
                    decoded_timestamp = self.generate_base64_encoded_string(start_time)  
                    url = self.clean_url(url_metadata)  
                    timestamp_link = url + decoded_timestamp  
            else:  
                timestamp_link = url_metadata  
  
            fields_mapping.append({  
                "content_fields": content_columns,  
                "title_field": title,  
                "url_field": timestamp_link,  
                "filepath_field": None,  
                "vector_fields": None,  
                "start_time": start_time  
            })  
  
        return fields_mapping, answer  
  
# Example usage  
async def main():  
    query = "Registering a PDM Link Server"  
    azure_search_service = AzureSearchService()  
    fields_mapping, answer = await azure_search_service.process_query(query)  
    print("Fields Mapping:", fields_mapping)  
    print("Answer:", answer)  
  
# Run the example  
import asyncio  
asyncio.run(main())


Fields Mapping: []
Answer: There is no answer available


In [44]:
pp = []
pp.append({"URL": "i['url_field']", "FileName": "i['title_field']"})
pp.append({"URL": "i['url_field']", "FileName": "i['title_field']"})


In [40]:
fields_mapping

  'title_field': 'PDMLink101.docx',
  'url_field': 'https://microsoft.sharepoint.com/:v:/t/collearninganddevelopmentlnd/EVyZnBbaPUtPuuTINZ9xWnQBxAXl3-Ivlwo_dXKTX9OgBA?e=3gRIzo&nav=eyJyZWZlcnJhbEluZm8iOiB7InJlZmVycmFsQXBwIjogIlN0cmVhbVdlYkFwcCIsICJyZWZlcnJhbFZpZXciOiAiU2hhcmVEaWFsb2ctTGluayIsICJyZWZlcnJhbEFwcFBsYXRmb3JtIjogIldlYiIsICJyZWZlcnJhbE1vZGUiOiAidmlldyJ9LCAicGxheWJhY2tPcHRpb25zIjogeyJzdGFydFRpbWVJblNlY29uZHMiOiAxODMwfX0=',
  'filepath_field': None,
  'vector_fields': None,
  'start_time': 1830},
 {'content_fields': "going to go a little fast.\n00:37:52 Speaker 1\nI'm back to an empty workspace now. Let's go back to the PDM link training product in the browser view.\n00:37:59 Speaker 1\nHere's my search remembered, and this is the part that I want. I call this the easy button sometimes, but just to the left of the file name column there is this square within a square icon that says open in Creole. This is a shortcut that you get for every single piece of CAD. This column is the 

In [10]:
from dotenv import load_dotenv
import os
load_dotenv()

True

In [11]:
from azure.identity import DefaultAzureCredential, get_bearer_token_provider
from azure.search.documents import SearchClient
from azure.search.documents.models import QueryType, VectorizedQuery,QueryAnswerType, QueryCaptionType
from openai import AzureOpenAI, AsyncAzureOpenAI
from backend.auth.auth_utils import get_authenticated_user_details
from backend.security.ms_defender_utils import get_msdefender_user_json
import re
import base64
import json

service_endpoint = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT")
wiki_index = os.getenv("AZURE_SEARCH_INDEX_WIKI")
video_index = os.getenv("AZURE_SEARCH_INDEX_VIDEO")
api_version = os.environ.get("AZURE_OPENAI_PREVIEW_API_VERSION")
aoai_api_key = os.environ.get("AZURE_OPENAI_KEY")
embedding_model = os.environ.get("AZURE_OPENAI_EMBEDDING_NAME")
chat_model = os.environ.get("AZURE_OPENAI_MODEL")
credential = DefaultAzureCredential()

ad_token_provider = get_bearer_token_provider(credential,"https://cognitiveservices.azure.com/.default")
azure_endpoint = (
            os.environ.get("AZURE_OPENAI_ENDPOINT")
            if os.environ.get("AZURE_OPENAI_ENDPOINT")
            else f"https://{os.environ.get('AZURE_OPENAI_RESOURCE')}.openai.azure.com/")
default_headers = {"x-ms-useragent": "GitHubSampleWebApp/AsyncAzureOpenAI/1.0.0"}

wiki_search_client = SearchClient(service_endpoint, wiki_index, credential)
video_search_client = SearchClient(service_endpoint, video_index, credential)
authenticated_user_details = get_authenticated_user_details({})
conversation_id = None     
user_json = get_msdefender_user_json(authenticated_user_details, {}, conversation_id)

openai_client = AsyncAzureOpenAI(
  api_version = api_version, 
  api_key  = aoai_api_key,
  azure_ad_token_provider = ad_token_provider,
  azure_endpoint = azure_endpoint,
  default_headers = default_headers,)


In [21]:
async def generate_embeddings(query, model):
    embeddings_response = await openai_client.embeddings.create(model=model, input=query)
    embedding = embeddings_response.data[0].embedding
    return embedding

def convert_timestamp_to_seconds(timestamp_str):
    match = re.search(r'(\d{2}):(\d{2}):(\d{2})', timestamp_str)
    if match:
        hours, minutes, seconds = map(int, match.groups())
        total_seconds = hours * 3600 + minutes * 60 + seconds
        return total_seconds
    else:
        print("No valid timestamp found in the string")
        return 0

def extract_integer(value):
    if isinstance(value, int):
        return value
    
    if isinstance(value, str):
        match = re.search(r'\d+', value)
        if match:
            return int(match.group())
    
    raise ValueError("Input must be an integer or a string representing a single integer.")


def is_video_link(url):
    video_extensions = ['mp4', 'mkv', 'avi', 'mov', 'wmv', 'flv', 'webm']
    extension = url.split('.')[-1]
    return extension in video_extensions



def generate_base64_encoded_string(start_time_in_seconds):
    data = {
        "referralInfo": {
            "referralApp": "StreamWebApp",
            "referralView": "ShareDialog-Link",
            "referralAppPlatform": "Web",
            "referralMode": "view"
        },
        "playbackOptions": {
            "startTimeInSeconds": start_time_in_seconds
        }
    }
    
    json_string = json.dumps(data)
    
    base64_encoded = base64.b64encode(json_string.encode('utf-8')).decode('utf-8')
    
    return "&nav="+base64_encoded

def clean_url(url):
    clean_url = re.sub(r'([?&]nav=).*', '', url)
    if clean_url[-1] == '?' or clean_url[-1] == '&':
        clean_url[:-1]
    return clean_url

def remove_duplicates(lst):
    seen = set()
    unique_lst = []
    
    for item in lst:
        item_tuple = tuple(item)
        if item_tuple not in seen:
            seen.add(item_tuple)
            unique_lst.append(item)
    
    return unique_lst


In [22]:
query = "how to get cad into your workspace"
# query = "Registering a PDM Link Server"
vector_query = VectorizedQuery(vector= await generate_embeddings(query, embedding_model), 
                               k_nearest_neighbors=3, fields="text_vector",)

results_wiki = list(wiki_search_client.search(
    search_text=query,
    vector_queries=[vector_query],
    select=["title", "chunk", "url_metadata"],
    query_type=QueryType.SEMANTIC,
    semantic_configuration_name="semantic",
    query_caption=QueryCaptionType.EXTRACTIVE,
    query_answer=QueryAnswerType.EXTRACTIVE,
    top=3,
))


In [23]:
results_video = list(video_search_client.search(
    search_text=query,
    vector_queries=[vector_query],
    select=["title", "chunk", "url_metadata"],
    query_type=QueryType.SEMANTIC,
    semantic_configuration_name="semantic",
    query_caption=QueryCaptionType.EXTRACTIVE,
    query_answer=QueryAnswerType.EXTRACTIVE,
    top=3,
))

In [24]:
# semantic_answers = results_wiki.get_answers()
# for answer in semantic_answers:
#     if answer.highlights:
#         print(f"Semantic Answer: {answer.highlights}")
#     else:
#         print(f"Semantic Answer: {answer.text}")
#     print(f"Semantic Answer Score: {answer.score}\n")

# for result in results_wiki:
#     print(f"Title: {result['title']}")
#     print(f"Reranker Score: {result['@search.reranker_score']}")
#     print(f"URL: {result['url_metadata']}")
#     captions = result["@search.captions"]
#     if captions:
#         caption = captions[0]
#         if caption.highlights:
#             print(f"Caption: {caption.highlights}\n")
#         else:
#             print(f"Caption: {caption.text}\n")

In [25]:
# results_wiki = list(wiki_search_client.search(search_text=None, vector_queries= [query_embbeding], semantic_configuration_name='semantic', top=3, query_type=QueryType.SEMANTIC))
# results_video = list(video_search_client.search(search_text=None,vector_queries= [query_embbeding],semantic_configuration_name='semantic', top=3, query_type=QueryType.SEMANTIC))

In [26]:
results = results_wiki + results_video
filter_result = []
for d in results:
    if d in results_wiki:
        d['container'] = 'wiki'
    elif d in results_video:
        d['container'] = 'video'

sorted_data = sorted(results_wiki + results_video, key=lambda x: x["@search.reranker_score"], reverse=True)
# selected_container = sorted_data[0]['container']
selected_chunks = sorted_data[:3]
# for i in sorted_data:
#     if i['container'] == selected_container and len(selected_chunks)<2:
#         selected_chunks.append(i)


context_str = f"""
**documents: 1**

{selected_chunks[0]['chunk']}


**documents: 2**

{selected_chunks[1]['chunk']}

**documents: 3**

{selected_chunks[2]['chunk']}
"""

# if selected_container == "video":
#     citation_prompt = "Context is a transcript which has timestamp in the form HH:MM:SS in each line above texts. Once you provide the answer, include the all the citations based on each of the timestamp and documents number [timestap, document number] at the end which are used to generate the answer. citation format is [['00:11:00', 1], ['00:1:44', 2]]"
# else:
#     citation_prompt = "Context is provided in the form of multiple documents. Once you provide the answer, include the citation based on the documents number at the end. citation format is [1,2]"

RAG_SYSTEM_PROMPT = f"""\
Context information is below.
---------------------
{context_str}
---------------------
INSTRUCTIONS:
1. You are an assistant who helps users answer their queries.
2. Answer the user's question from the above Context. The Context is provided in the form of multiple documents, each identified by a document number. If a document is a transcript, it also includes timestamps in the format HH:MM on each line above the text.
3. Give answer in step by step format.
4. Keep your answer concise and solely on the information given in the Context above.
5. Always provide the answer with all relevant citations, ensuring that each citation includes the corresponding timestamp and document number used to generate the response. Provide the citation in the following format only at the end of the whole answer not in between.
    - For transcript, use: [timestamp, documents number]. for example [["00:11:00", 1], ["00:1:44", 2]]
    - For non transcript, use: ["", documents number]. for example [["", 3],["", 1], ["", 2]]
7. Do not create or derive your own answer. If the answer is not directly available in the context, just reply stating, 'There is no answer available'
"""


In [27]:
import instructor
from pydantic import BaseModel, Field
from typing import List, Dict, Optional


class Asnwer_Cittion(BaseModel):
    """ 
    Asnwer and Citations
    """
    citation: List[List] = Field(
        description="Include all the citations")
        
    answer: str = Field(description="only include Answer, do not include citations in this")
    


instructor_client = instructor.from_openai(openai_client)

# Extract structured data from natural language
user_info = await instructor_client.chat.completions.create(
    model=chat_model,
    response_model=Asnwer_Cittion,
    messages=[{"role": "system", "content": RAG_SYSTEM_PROMPT},
              {"role": "user", "content": query}],
    user = user_json
)


In [28]:
# user_info._raw_response

In [29]:
print(user_info.answer)

To get CAD into your workspace, follow these steps:

1. **Checkout Method:**
   - Locate the part you want to work on in the Windchill system.
   - Check out the part to your workspace by selecting it and clicking on the checkout option. This will lock the part for your modifications and prevent others from checking it out simultaneously [00:30:30, 1].

2. **Open in Creo Method:**
   - Locate the CAD part in Windchill.
   - Click the 'Open in Creo' button (a square within a square icon to the left of the file name). This action will automatically add the part to your workspace and open it in Creo [00:38:20, 2], [00:38:51, 2]. This is often used for read-only users.

3. **Navigating Different Products:**
   - If the part you need is in a different product, navigate to the list of products you have access to via the 'Products' breadcrumb link.
   - Find the product, locate the CAD part, and then use the 'Open in Creo' button to add it to your workspace [00:17:48, 3].


In [30]:
user_info.citation

[['00:30:30', 1], ['00:38:20', 2], ['00:38:51', 2], ['00:17:48', 3]]

In [34]:
fields_mapping = []
for i in remove_duplicates(user_info.citation):
    index = extract_integer(i[1]) - 1
    url_metadata = selected_chunks[index]['url_metadata']
    title = selected_chunks[index]['title']
    container = selected_chunks[index]['container']
    content_columns = selected_chunks[index]['chunk']
    if container == 'video':
        start_time = convert_timestamp_to_seconds(i[0])
        if is_video_link(url_metadata):
            timestamp_link = url_metadata+f"#t={start_time}"
        else:
            decoded_timestamp = generate_base64_encoded_string(start_time)
            url = clean_url(url_metadata)
            timestamp_link = url+decoded_timestamp
    else:
        timestamp_link = url_metadata

    fields_mapping.append({
            "content_fields": content_columns,
            "title_field": title,
            "url_field": timestamp_link,
            "filepath_field": None,
            "vector_fields": None,
            "start_time": start_time
        })   


In [35]:
fields_mapping

  'title_field': 'PDMLink101.docx',
  'url_field': 'https://microsoft.sharepoint.com/:v:/t/collearninganddevelopmentlnd/EVyZnBbaPUtPuuTINZ9xWnQBxAXl3-Ivlwo_dXKTX9OgBA?e=3gRIzo&nav=eyJyZWZlcnJhbEluZm8iOiB7InJlZmVycmFsQXBwIjogIlN0cmVhbVdlYkFwcCIsICJyZWZlcnJhbFZpZXciOiAiU2hhcmVEaWFsb2ctTGluayIsICJyZWZlcnJhbEFwcFBsYXRmb3JtIjogIldlYiIsICJyZWZlcnJhbE1vZGUiOiAidmlldyJ9LCAicGxheWJhY2tPcHRpb25zIjogeyJzdGFydFRpbWVJblNlY29uZHMiOiAxODMwfX0=',
  'filepath_field': None,
  'vector_fields': None,
  'start_time': 1830},
 {'content_fields': "going to go a little fast.\n00:37:52 Speaker 1\nI'm back to an empty workspace now. Let's go back to the PDM link training product in the browser view.\n00:37:59 Speaker 1\nHere's my search remembered, and this is the part that I want. I call this the easy button sometimes, but just to the left of the file name column there is this square within a square icon that says open in Creole. This is a shortcut that you get for every single piece of CAD. This column is the 

In [None]:
[{'content_fields': "working on that part. I can expect changes to be checked back in, maybe by that user in the near future.\n00:30:21 Speaker 1\nSo check out and add to workspaces.\n00:30:23 Speaker 1\nWhat this page?\n00:30:24 Speaker 1\nIs we talked about all those same.\n00:30:25 Speaker 1\nDetails I'm going to say OK.\n00:30:30 Speaker 1\nAnd now my workspace has that same copy of the same part, but it has a new status icon.\n00:30:36 Speaker 6\nIf I can mouse over it.\n00:30:38 Speaker 1\nIt says checked out by you. That means my username has control of this object and no other user in wind chill right now could go check it out at the same time.\n00:30:49 Speaker 1\nThat's how we throttle a ton of users across a bunch of different countries and time zones actively working in the same.\n00:30:56 Speaker 1\nCAD database at the same.\n00:30:58 Speaker 1\nTime. If you want to change a part and you have read write access to it, your first step should be take control of the part so somebody else can't do it while you're trying to modify the same part. So.\n00:31:09 Speaker 1\nCheck that part out.\n00:31:10 Speaker 1\nThis is what the checkout looks like.\n00:31:13 Speaker 1\nIf I've lost anybody, let.\n00:31:14 Speaker 1\nMe know.\n00:31:15 Speaker 1\nThe next thing I'm going to do is I'm going to throw this copy out of my workspace and show you another way to get CAD into your workspace. That might be a little bit simpler for a read only user.\n00:31:25 Speaker 1\nSo take that same object. I'm going to throw it out of my workspace. Remember, it's checked out to me.\n00:31:32 Speaker 1\nIn throwing that out of my workspace, it needs to cancel my checkout and it will. It will tell me I have a little warning here that says this is checked out by you. I'm going to cancel your checkout if.\n00:31:41 Speaker 1\nYou throw it out of your workspace.\n00:31:43 Speaker 1\nThat's that's what I want to do. I'll get another prompt that tells me the same thing.\n00:31:48 Speaker 1",
  'title_field': 'PDMLink101.docx',
  'url_field': 'https://microsoft.sharepoint.com/:v:/t/collearninganddevelopmentlnd/EVyZnBbaPUtPuuTINZ9xWnQBxAXl3-Ivlwo_dXKTX9OgBA?e=3gRIzo?nav=eyJyZWZlcnJhbEluZm8iOiB7InJlZmVycmFsQXBwIjogIlN0cmVhbVdlYkFwcCIsICJyZWZlcnJhbFZpZXciOiAiU2hhcmVEaWFsb2ctTGluayIsICJyZWZlcnJhbEFwcFBsYXRmb3JtIjogIldlYiIsICJyZWZlcnJhbE1vZGUiOiAidmlldyJ9LCAicGxheWJhY2tPcHRpb25zIjogeyJzdGFydFRpbWVJblNlY29uZHMiOiAxODMwfX0=',
  'filepath_field': None,
  'vector_fields': None,
  'start_time': 1830},
 {'content_fields': "working on that part. I can expect changes to be checked back in, maybe by that user in the near future.\n00:30:21 Speaker 1\nSo check out and add to workspaces.\n00:30:23 Speaker 1\nWhat this page?\n00:30:24 Speaker 1\nIs we talked about all those same.\n00:30:25 Speaker 1\nDetails I'm going to say OK.\n00:30:30 Speaker 1\nAnd now my workspace has that same copy of the same part, but it has a new status icon.\n00:30:36 Speaker 6\nIf I can mouse over it.\n00:30:38 Speaker 1\nIt says checked out by you. That means my username has control of this object and no other user in wind chill right now could go check it out at the same time.\n00:30:49 Speaker 1\nThat's how we throttle a ton of users across a bunch of different countries and time zones actively working in the same.\n00:30:56 Speaker 1\nCAD database at the same.\n00:30:58 Speaker 1\nTime. If you want to change a part and you have read write access to it, your first step should be take control of the part so somebody else can't do it while you're trying to modify the same part. So.\n00:31:09 Speaker 1\nCheck that part out.\n00:31:10 Speaker 1\nThis is what the checkout looks like.\n00:31:13 Speaker 1\nIf I've lost anybody, let.\n00:31:14 Speaker 1\nMe know.\n00:31:15 Speaker 1\nThe next thing I'm going to do is I'm going to throw this copy out of my workspace and show you another way to get CAD into your workspace. That might be a little bit simpler for a read only user.\n00:31:25 Speaker 1\nSo take that same object. I'm going to throw it out of my workspace. Remember, it's checked out to me.\n00:31:32 Speaker 1\nIn throwing that out of my workspace, it needs to cancel my checkout and it will. It will tell me I have a little warning here that says this is checked out by you. I'm going to cancel your checkout if.\n00:31:41 Speaker 1\nYou throw it out of your workspace.\n00:31:43 Speaker 1\nThat's that's what I want to do. I'll get another prompt that tells me the same thing.\n00:31:48 Speaker 1",
  'title_field': 'PDMLink101.docx',
  'url_field': 'https://microsoft.sharepoint.com/:v:/t/collearninganddevelopmentlnd/EVyZnBbaPUtPuuTINZ9xWnQBxAXl3-Ivlwo_dXKTX9OgBA?e=3gRIzo?nav=eyJyZWZlcnJhbEluZm8iOiB7InJlZmVycmFsQXBwIjogIlN0cmVhbVdlYkFwcCIsICJyZWZlcnJhbFZpZXciOiAiU2hhcmVEaWFsb2ctTGluayIsICJyZWZlcnJhbEFwcFBsYXRmb3JtIjogIldlYiIsICJyZWZlcnJhbE1vZGUiOiAidmlldyJ9LCAicGxheWJhY2tPcHRpb25zIjogeyJzdGFydFRpbWVJblNlY29uZHMiOiAxODU4fX0=',
  'filepath_field': None,
  'vector_fields': None,
  'start_time': 1858},
 {'content_fields': "going to go a little fast.\n00:37:52 Speaker 1\nI'm back to an empty workspace now. Let's go back to the PDM link training product in the browser view.\n00:37:59 Speaker 1\nHere's my search remembered, and this is the part that I want. I call this the easy button sometimes, but just to the left of the file name column there is this square within a square icon that says open in Creole. This is a shortcut that you get for every single piece of CAD. This column is the same for all the CAD you'll see. That's a shortcut to do that.\n00:38:20 Speaker 1\nOpen in Creole for this part right here. So once you find the part, I'm going to click this now open in Creole with one click. This is in my workspace, and I'm looking at that same CAD.\n00:38:30 Speaker 1\nThis is probably the only way that you will be using to open CAD from read only products, so those of you that mentioned you're on the DFX team or you're somewhere where you're not actively developing or modifying CAD that add to or that open and creo button will be how you open CAD and and you can see it's immediately up on your screen.\n00:38:51 Speaker 1\nYou could spin it around and analyze it however you want, but that's the most common way to get CAD into your workspace, especially as a read only user.\n00:38:59 Speaker 1\nHave I lost you so far?\n00:39:01 Speaker 1\nIf so, please ask the question. Otherwise, I really hope everybody's following along and just maybe healthfully bored. OK, so now that I have the cat in my workspace, well, how do I go about modifying it in some way? And then if I'm happy with that modification, check it back in for everybody else to see.\n00:39:21 Speaker 1\nSo that requires read write access to the product. In this case I do have read write access to where this part is and so I can modify this part if I want.\n00:39:30 Speaker 1\nNow this is not checked out to me right now. Remember I said as soon as you start modifying the part.",
  'title_field': 'PDMLink101.docx',
  'url_field': 'https://microsoft.sharepoint.com/:v:/t/collearninganddevelopmentlnd/EVyZnBbaPUtPuuTINZ9xWnQBxAXl3-Ivlwo_dXKTX9OgBA?e=3gRIzo?nav=eyJyZWZlcnJhbEluZm8iOiB7InJlZmVycmFsQXBwIjogIlN0cmVhbVdlYkFwcCIsICJyZWZlcnJhbFZpZXciOiAiU2hhcmVEaWFsb2ctTGluayIsICJyZWZlcnJhbEFwcFBsYXRmb3JtIjogIldlYiIsICJyZWZlcnJhbE1vZGUiOiAidmlldyJ9LCAicGxheWJhY2tPcHRpb25zIjogeyJzdGFydFRpbWVJblNlY29uZHMiOiAyMjcyfX0=',
  'filepath_field': None,
  'vector_fields': None,
  'start_time': 2272},
 {'content_fields': "going to go a little fast.\n00:37:52 Speaker 1\nI'm back to an empty workspace now. Let's go back to the PDM link training product in the browser view.\n00:37:59 Speaker 1\nHere's my search remembered, and this is the part that I want. I call this the easy button sometimes, but just to the left of the file name column there is this square within a square icon that says open in Creole. This is a shortcut that you get for every single piece of CAD. This column is the same for all the CAD you'll see. That's a shortcut to do that.\n00:38:20 Speaker 1\nOpen in Creole for this part right here. So once you find the part, I'm going to click this now open in Creole with one click. This is in my workspace, and I'm looking at that same CAD.\n00:38:30 Speaker 1\nThis is probably the only way that you will be using to open CAD from read only products, so those of you that mentioned you're on the DFX team or you're somewhere where you're not actively developing or modifying CAD that add to or that open and creo button will be how you open CAD and and you can see it's immediately up on your screen.\n00:38:51 Speaker 1\nYou could spin it around and analyze it however you want, but that's the most common way to get CAD into your workspace, especially as a read only user.\n00:38:59 Speaker 1\nHave I lost you so far?\n00:39:01 Speaker 1\nIf so, please ask the question. Otherwise, I really hope everybody's following along and just maybe healthfully bored. OK, so now that I have the cat in my workspace, well, how do I go about modifying it in some way? And then if I'm happy with that modification, check it back in for everybody else to see.\n00:39:21 Speaker 1\nSo that requires read write access to the product. In this case I do have read write access to where this part is and so I can modify this part if I want.\n00:39:30 Speaker 1\nNow this is not checked out to me right now. Remember I said as soon as you start modifying the part.",
  'title_field': 'PDMLink101.docx',
  'url_field': 'https://microsoft.sharepoint.com/:v:/t/collearninganddevelopmentlnd/EVyZnBbaPUtPuuTINZ9xWnQBxAXl3-Ivlwo_dXKTX9OgBA?e=3gRIzo?nav=eyJyZWZlcnJhbEluZm8iOiB7InJlZmVycmFsQXBwIjogIlN0cmVhbVdlYkFwcCIsICJyZWZlcnJhbFZpZXciOiAiU2hhcmVEaWFsb2ctTGluayIsICJyZWZlcnJhbEFwcFBsYXRmb3JtIjogIldlYiIsICJyZWZlcnJhbE1vZGUiOiAidmlldyJ9LCAicGxheWJhY2tPcHRpb25zIjogeyJzdGFydFRpbWVJblNlY29uZHMiOiAyMzQxfX0=',
  'filepath_field': None,
  'vector_fields': None,
  'start_time': 2341},
 {'content_fields': "refresh.\n00:17:29 Speaker 1\nThis list I have a piece of CAD in here from the PDM link training product. I get a little more workspace room. Here you see the location for this object. It's coming from the PDM link training product.\n00:17:42 Speaker 1\nWell, what happens if I?\n00:17:43 Speaker 1\nWanna go take a look apart at a part from a different product so.\n00:17:48 Speaker 1\nThis breadcrumb links called products will take you to a list of all the products that you have any kind of access to. Doesn't matter if it's read, write or just read. Only this list of products is any product that you have access to and you notice we have over 1000 products in wind chill. If I was looking for Surface Pro.\n00:18:07 Speaker 1\nThree, for example.\n00:18:10 Speaker 1\nI'll just type Surface Pro and in the description column all the release surface products. If you have access to them, you can see the description column defines what they are. So this guy right here is Surface Pro 3. Product CAD code was GP 79. So if I click this product from the name column.\n00:18:29 Speaker 1\nI'll go into the CAD from the Surface Pro 3 cabinet from GP79. Notice I'm in a product called GP 79.\n00:18:38 Speaker 1\nAnd if from here I find the piece of cat I'm interested in opening this in Creole or adding it to your workspace will add it to whatever your current active workspace is, no matter if that workspace is on a current product or the training product, or a read only product.\n00:18:55 Speaker 1\nThis is how you navigate to any product you have access to and then open CAD from that product. I'm just gonna randomly pick one of these parts. I hope I pick one that isn't grabbing a bunch of other stuff, but I'm going to choose open and Creole to this random part from Surface Pro 3. It's going to put a copy in my workspace and then open the copy.\n00:19:18 Speaker 1\nAnd as soon as that's done, I'll take you into my workspace to.\n00:19:20 Speaker 1\nShow.\n00:19:20 Speaker 1",
  'title_field': 'PDMLink201.docx',
  'url_field': 'https://microsoft.sharepoint.com/:v:/t/collearninganddevelopmentlnd/EVBUq6yXQ1JLkXnYRDSZfmMB_3-XsmdgUvXnRP8AhQmP3w?e=5rn5CN?nav=eyJyZWZlcnJhbEluZm8iOiB7InJlZmVycmFsQXBwIjogIlN0cmVhbVdlYkFwcCIsICJyZWZlcnJhbFZpZXciOiAiU2hhcmVEaWFsb2ctTGluayIsICJyZWZlcnJhbEFwcFBsYXRmb3JtIjogIldlYiIsICJyZWZlcnJhbE1vZGUiOiAidmlldyJ9LCAicGxheWJhY2tPcHRpb25zIjogeyJzdGFydFRpbWVJblNlY29uZHMiOiAxMDQ5fX0=',
  'filepath_field': None,
  'vector_fields': None,
  'start_time': 1049}]

In [None]:
https://microsoft.sharepoint.com/teams/collearninganddevelopmentlnd/_layouts/15/stream.aspx?id=%2Fteams%2Fcollearninganddevelopmentlnd%2FShared%20Documents%2FMechanical%20Engineering%2FPDMLink%20101%20Training%20Class%20%2D%20Wednesday%208%5F24%20%40%201%5F00pm%2Emp4&ga=1&referrer=StreamWebApp%2EWeb&referrerScenario=AddressBarCopied%2Eview%2E7c13af20%2Dc463%2D44d5%2D8199%2D241902524cfb

In [116]:
print(RAG_SYSTEM_PROMPT)

Context information is below.
---------------------

**documents: 1**

Registering a PDM Link Server

Upon first installing Creo, the only files available are those on your computer. If access to the
Windchill PDMLink server is required you will need to set up a server in the Server Manager. You will
learn how to achieve a functional connection between Creo and PDMLink.

Requirements for a Successful Connection
Steps to Register
About ME Systems & Design

In order to successfully connect, you must:

Have an active connection to the Microsoft internal network.
Have an account in the Windchill PDMLink server system.
Have the right to view the Product(s) of interest.

To register a server for the first time:

1. In the Creo Parametric menu select File > Manage Session > Server Management.
2. Select Server > Register New Server... or right-click in the Servers window and select Register

New Server...
3. In the Register New Server dialog that appears, enter the following:

Name: PDMLink (o

In [14]:
!azd auth login

Retrieving subscriptions...
Logged in to Azure.



To update to the latest version, run:
winget upgrade Microsoft.Azd


In [13]:
from langchain_openai import AzureChatOpenAI
from azure.identity import DefaultAzureCredential, get_bearer_token_provider
from backend.auth.auth_utils import get_authenticated_user_details
from backend.security.ms_defender_utils import get_msdefender_user_json
from quart import request
# from llama_index.llms.azure_openai import AzureOpenAI

In [14]:
api_version = os.environ.get("AZURE_OPENAI_PREVIEW_API_VERSION")
aoai_api_key = os.environ.get("AZURE_OPENAI_KEY")
ad_token_provider = get_bearer_token_provider(DefaultAzureCredential(),"https://cognitiveservices.azure.com/.default")
endpoint = (
            os.environ.get("AZURE_OPENAI_ENDPOINT")
            if os.environ.get("AZURE_OPENAI_ENDPOINT")
            else f"https://{os.environ.get('AZURE_OPENAI_RESOURCE')}.openai.azure.com/"
        )
default_headers = {"x-ms-useragent": "GitHubSampleWebApp/AsyncAzureOpenAI/1.0.0"}


authenticated_user_details = get_authenticated_user_details({})
conversation_id = None     
user_json = get_msdefender_user_json(authenticated_user_details, {}, conversation_id)


# from llama_index.llms.azure_openai import AzureOpenAI

# llm = AzureOpenAI(
#     engine=os.environ.get("AZURE_OPENAI_MODEL"),
#     model=os.environ.get("AZURE_OPENAI_MODEL_NAME"),
#     api_key=aoai_api_key,
#     azure_endpoint=endpoint,
#     api_version=api_version,
#     AZURE_AD_TOKEN_PROVIDER = ad_token_provider,
#     use_azure_ad =True,
#     kwargs={'default_headers': default_headers, "user": user_json},
# )

llm = AzureChatOpenAI(
    deployment_name = os.environ.get("AZURE_OPENAI_MODEL"),
    openai_api_version = api_version,
    openai_api_key = aoai_api_key,
    azure_ad_token_provider = ad_token_provider,
    default_headers = default_headers,
    azure_endpoint = endpoint,
    include_response_headers = True,
    streaming= True,
    temperature= 0.15,
    # user_json = user_json
    # model_kwargs = {'': user_json}
    # logprobs=True,
    model_kwargs={"stream_options": {"include_usage": True}}
    )

In [45]:
from llama_index.core.llms import ChatMessage

messages = [
    ChatMessage(
        role="system", content="You are a pirate with colorful personality."
    ),
    ChatMessage(role="user", content="Hello"),
]

response = llm.chat(messages)
print(response)

assistant: Ahoy there, matey! Welcome aboard! What brings ye to these treacherous waters today? Lookin' for treasure, a tale, or perhaps a bit o' both? Arrr!


In [6]:
import nest_asyncio
nest_asyncio.apply()

In [5]:
messages = [
    (
        "system",
        "You are a helpful assistant that helps user.",
    ),
    ("human", "How are you?"),
]
ai_msg = llm.invoke(messages)
# apim_request_id = ai_msg.response_metadata['headers']['apim-request-id']
ai_msg


AIMessage(content="I'm just a computer program, so I don't have feelings, but I'm here and ready to help you! How can I assist you today?", additional_kwargs={}, response_metadata={'finish_reason': 'stop', 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_80a1bad4c7'}, id='run-3f724057-5589-4b85-9fd8-311050c8c8b3-0', usage_metadata={'input_tokens': 24, 'output_tokens': 28, 'total_tokens': 52})

In [40]:
from backend.utils import (
    format_as_ndjson,
    format_stream_response,
    format_non_streaming_response,
    convert_to_pf_format,
    format_pf_non_streaming_response,
)