In [8]:
import requests

# Replace with your actual credentials
tenant_id = "your-tenent-id"
client_id = "your-client-id"
client_secret = "your-client-secret"

# Ensure "resource" is set to your SharePoint domain
resource = "https://graph.microsoft.com"  # ✅ This should match your SharePoint domain

token_url = f"https://login.microsoftonline.com/{tenant_id}/oauth2/token"

payload = {
    "grant_type": "client_credentials",
    "client_id": client_id,
    "client_secret": client_secret,
    "resource": resource  # ✅ Check that this matches your SharePoint site
}

response = requests.post(token_url, data=payload)

if response.status_code == 200:
    access_token = response.json().get("access_token")
    print("Access Token:", access_token)
else:
    print("Error:", response.status_code, response.text)

Error: 400 {"error":"invalid_request","error_description":"AADSTS900023: Specified tenant identifier 'your-tenent-id' is neither a valid DNS name, nor a valid external domain. Trace ID: 8d023769-45e5-4782-ba0a-c3cda2c85400 Correlation ID: 56284c0e-511b-4310-bc7e-90c2d98c19d5 Timestamp: 2025-03-03 20:47:25Z","error_codes":[900023],"timestamp":"2025-03-03 20:47:25Z","trace_id":"8d023769-45e5-4782-ba0a-c3cda2c85400","correlation_id":"56284c0e-511b-4310-bc7e-90c2d98c19d5","error_uri":"https://login.microsoftonline.com/error?code=900023"}


In [3]:
headers = {
    "Authorization": f"Bearer {access_token}",
    "Accept": "application/json"
}

# Microsoft Graph API endpoint to list SharePoint sites
graph_api_url = "https://graph.microsoft.com/v1.0/sites"

response = requests.get(graph_api_url, headers=headers)

if response.status_code == 200:
    data = response.json()
    
    # ✅ Print full response to inspect available fields
    print("Full API Response:", data)
    
    print("\nSharePoint Sites:")
    for site in data.get("value", []):
        # ✅ Use correct field names from API response
        site_name = site.get("displayName", "Unknown Site")  # Fix: Use 'displayName'
        site_id = site.get("id", "No ID Available")  # Fix: Ensure 'id' exists
        
        print(f"- {site_name} (ID: {site_id})")
else:
    print("Error:", response.status_code, response.text)

Full API Response: {'@odata.context': 'https://graph.microsoft.com/v1.0/$metadata#sites', 'value': [{'createdDateTime': '2025-02-26T21:09:34Z', 'id': 'senecazz.sharepoint.com,d699f9b4-d438-4725-b214-b6ce8f8ecae4,3cb11767-f403-4e85-926a-630ddfd8abde', 'name': 'Project Test', 'webUrl': 'https://senecazz.sharepoint.com/sites/ProjectTest', 'displayName': 'Project Test', 'isPersonalSite': False, 'siteCollection': {'hostname': 'senecazz.sharepoint.com'}, 'root': {}}, {'createdDateTime': '2025-02-26T21:05:59Z', 'id': 'senecazz-my.sharepoint.com,d1ea8356-480e-4a63-ba8d-57cdc7cec472,be7fe72c-68a8-4246-ab61-81424ab7909b', 'name': 'Vipin Nandal', 'webUrl': 'https://senecazz-my.sharepoint.com/personal/vipinnandal_senecazz_onmicrosoft_com', 'displayName': 'Vipin Nandal', 'isPersonalSite': True, 'siteCollection': {'hostname': 'senecazz-my.sharepoint.com'}, 'root': {}}, {'createdDateTime': '2025-02-26T20:55:02Z', 'id': 'senecazz.sharepoint.com,a8e50567-bbb8-4005-b0e3-befe21dc0558,894754f5-859f-43be-b

In [None]:
site_id = "your-site-id"  # ✅ Replace with the correct Site ID

headers = {
    "Authorization": f"Bearer {access_token}",
    "Accept": "application/json"
}

# Microsoft Graph API endpoint to get Drive ID for the site
graph_api_url = f"https://graph.microsoft.com/v1.0/sites/{site_id}/drive"

response = requests.get(graph_api_url, headers=headers)

if response.status_code == 200:
    data = response.json()
    drive_id = data.get("id")
    print(f"Drive ID for 'Project Test': {drive_id}")
else:
    print("Error:", response.status_code, response.text)

Drive ID for 'Project Test': b!tPmZ1jjUJUeyFLbOj47K5GcXsTwD9IVOkmpjDd_Yq97LTtIsTRVCQ442jQCQ8Nfn


In [None]:
drive_id = "your-drive-id"  # ✅ Replace with the correct Drive ID
graph_api_url = f"https://graph.microsoft.com/v1.0/drives/{drive_id}/root/children"

headers = {
    "Authorization": f"Bearer {access_token}",
    "Accept": "application/json"
}
response = requests.get(graph_api_url, headers=headers)

documents = []

if response.status_code == 200:
    data = response.json()
    print("All Documents in 'Shared Documents':")
    for file in data.get("value", []):
        doc = {
            "name": file["name"],
            "url": file["webUrl"],
            "id": file["id"],
        }
        documents.append(doc)
        print(f"- {doc['name']} (URL: {doc['url']})")
else:
    print("Error:", response.status_code, response.text)


All Documents in 'Shared Documents':
- Test (URL: https://senecazz.sharepoint.com/sites/ProjectTest/Shared%20Documents/Test)
- assignment-5.pdf (URL: https://senecazz.sharepoint.com/sites/ProjectTest/Shared%20Documents/assignment-5.pdf)
- Machine Learning Deployment Assignment.pdf (URL: https://senecazz.sharepoint.com/sites/ProjectTest/Shared%20Documents/Machine%20Learning%20Deployment%20Assignment.pdf)
- Project_1.pdf (URL: https://senecazz.sharepoint.com/sites/ProjectTest/Shared%20Documents/Project_1.pdf)
- Worksheet5_Sobel.pdf (URL: https://senecazz.sharepoint.com/sites/ProjectTest/Shared%20Documents/Worksheet5_Sobel.pdf)


In [9]:
import faiss
import numpy as np
import pickle
import ollama


# Set the correct embedding dimension (3072 for Llama 3.2)
embedding_dim = 3072
index = faiss.IndexFlatIP(embedding_dim)

doc_embeddings = []
doc_metadata = []  # Store metadata for each document

for doc in documents:
    text_to_embed = f"Document: {doc['name']} - URL: {doc['url']}"
    # Generate embedding using Ollama Llama 3.2
    embedding = ollama.embeddings("llama3.2", text_to_embed)["embedding"]
    vector = np.array(embedding, dtype=np.float32)
    doc_embeddings.append(vector)
    doc_metadata.append(doc)  # Save metadata

# Convert embeddings to a NumPy array and add to FAISS index
doc_embeddings = np.vstack(doc_embeddings)
index.add(doc_embeddings)

# Save the FAISS index to a file
faiss.write_index(index, "sharepoint_index.faiss")

# Save the document metadata (doc_metadata) to a pickle file
with open("doc_metadata.pkl", "wb") as f:
    pickle.dump(doc_metadata, f)

print("✅ FAISS index and document metadata saved!")

✅ FAISS index and document metadata saved!


In [10]:
import gradio as gr

# Load FAISS index & document metadata
try:
    index = faiss.read_index("sharepoint_index.faiss")
    with open("doc_metadata.pkl", "rb") as f:
        doc_metadata = pickle.load(f)
    print("Loaded FAISS index and document metadata successfully.")
except Exception as e:
    print("Error loading index or metadata:", e)
    doc_metadata = []
    index = None

def search_documents(query, k=3):
    """
    Uses Ollama Llama 3.2 embeddings + FAISS to return top k matches.
    We'll format results in Markdown for clickable links.
    """
    if index is None or len(doc_metadata) == 0:
        return "Error: No index available. Please rebuild the index first."
    
    # Get embedding for the query
    result = ollama.embeddings("llama3.2", query)
    query_embedding = result["embedding"]
    query_vector = np.array(query_embedding, dtype=np.float32).reshape(1, -1)
    
    # Search in FAISS
    distances, indices = index.search(query_vector, k)
    
    # Build Markdown output
    md_output = ["**Search Results**"]
    for idx in indices[0]:
        doc = doc_metadata[idx]
        # Create a clickable link in Markdown
        md_output.append(f"- [{doc['name']}]({doc['url']})")
    
    # Join results into a single Markdown string
    return "\n".join(md_output)

iface = gr.Interface(
    fn=search_documents,
    inputs=gr.Textbox(lines=2, label="Search Query"),
    # ✅ Use Markdown output for clickable links
    outputs=gr.Markdown(label="Search Results"),
    title="SharePoint Natural Language Search",
    description="Enter a natural language query to search documents in the SharePoint site using Ollama Llama 3.2 and FAISS."
)

iface.launch()

Loaded FAISS index and document metadata successfully.
* Running on local URL:  http://127.0.0.1:7861

To create a public link, set `share=True` in `launch()`.


