In [None]:
!pip3 install aperturedb



In [None]:
!pip3 install langchain_community
!pip3 install langchain_google_genai
!pip3 install langchain_ollama



In [None]:
import requests
import google.generativeai as palm
from dotenv import load_dotenv
import os
from langchain_community.vectorstores import ApertureDB
import logging
import sys
from langchain_core.documents import Document
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from google.colab import userdata
from langchain.embeddings import HuggingFaceEmbeddings

from langchain_ollama import OllamaEmbeddings



In [None]:
# Retrieve variables from .env
GITHUB_REPO = userdata.get("GITHUB_REPO")
GITHUB_TOKEN = userdata.get("GITHUB_TOKEN")
GOOGLE_API_KEY = userdata.get("GOOGLE_API_KEY")
#APERTUREDB_HOST = userdata.get("APERTUREDB_HOST")


In [None]:

DESCRIPTOR_SET = "github_issue_embeddings"


MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"  # Example model

# Initialize Llama (Hugging Face) embeddings
embeddings = HuggingFaceEmbeddings(model_name=MODEL_NAME)

vectorstore = ApertureDB(embeddings=embeddings, descriptor_set=DESCRIPTOR_SET)



In [None]:
# Function to fetch all issues from GitHub
def fetch_github_issues(repo, token):
    url = f"https://api.github.com/repos/{repo}/issues"
    headers = {"Authorization": f"token {token}"}
    issues = []
    page = 1

    while True:
        response = requests.get(url, headers=headers, params={"page": page,"state":"closed", "per_page": 1000})
        if response.status_code != 200:
            raise Exception(f"GitHub API error: {response.json()}")

        page_issues = response.json()
        if not page_issues:
            break

        issues.extend(page_issues)
        page += 1
        if page > 10:
          break


    return issues

In [None]:
# Function to load issues into ApertureDB
def load_issues_into_aperturedb(issues):
    document_list=[]
    for issue in issues:
        issue_id = issue.get("id")
        title = issue.get("title")
        body = issue.get("body", " ")
        full_text = f"{title}\n\n{body}"
        url = issue.get("html_url")

        # Construct ApertureDB query
        if (issue_id is not None) or (title  is not None) or (url is not None):
          document= Document(
            page_content=full_text,
            metadata={
                    "id": issue_id,
                    "number": issue.get("number"),
                    # "title": title,
                    # "description": issue.get("body", title),
                    # "url": url
                    }
            )
          document_list.append(document)

    response = vectorstore.add_documents(document_list)
    print(response)
    if len(response) != len(document_list):
        print(f"Failed to load some documents {issue_id}: {response}")

In [None]:

print("Fetching issues from GitHub...")

issues = fetch_github_issues(GITHUB_REPO, GITHUB_TOKEN)

print(f"No of issues for that repository {len(issues)}")

print(f"Fetched {len(issues)} issues. Loading into ApertureDB...")


Fetching issues from GitHub...
No of issues for that repository 1000
Fetched 1000 issues. Loading into ApertureDB...


In [None]:
load_issues_into_aperturedb(issues)

print("All issues loaded successfully!")



['0a495ac8-b221-4704-adf0-a51d6f773271', 'b2798b26-04b0-47aa-8e07-b00d8d1d1db3', '9fa5a979-480b-4252-882e-4c6256cf8584', '361ffc3d-19a5-4cf2-abec-f5f6cedf1b9e', '03122d70-998c-42d5-a85e-bdc670343718', '1b90fe6b-f5a6-4367-8829-94611c635fff', '8a4406af-b24c-469c-b5da-d84c4d198447', 'a568bca6-9800-4591-b57c-1dea009893d8', 'e99b83e1-6c82-4f69-9e3b-3161f3ebefc9', '5ed9f17e-73ad-4f5f-8728-86a1c5177344', '3ead15d6-7fe7-42f8-b0df-16057a6036f2', 'a37cfb39-0b12-46af-b31d-032810c8543f', 'd7b11a09-bf1f-4820-9978-c22c74364350', '72889071-bddb-496a-ae6d-25518ff13c29', 'b3c00950-77b8-469b-a4c7-34c2bd581b57', '1a25d673-6980-4096-9d04-11adfd1d9282', '63563303-e793-4bab-b036-2f3e6990b777', '05cae946-a992-4db1-85c0-4298a1115e0f', '9c1dada8-11d8-4d32-9705-163dca63588f', '27f9fbc9-450f-4e03-af41-638a525b650d', '1a02a975-4434-40f8-9745-494811686218', 'a1dac586-96a0-4b5c-af11-bd7f0f3b8ac0', '7497febc-81e0-4233-bf66-a0c5e0ccc888', 'a2e911cd-544d-43fc-9b6b-f267c0055ae6', '0e5557b4-5bde-4439-b092-8619c6efbd69',

In [None]:
#vectorstore.delete_vectorstore(DESCRIPTOR_SET)