Skip to content

Commit a2d2710

Browse files
committed
Gitlab support for issues, tickets GUI, UI improvements
1 parent b17d7dd commit a2d2710

File tree

13 files changed

+246
-162
lines changed

13 files changed

+246
-162
lines changed

app/data_source/api/basic_document.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,16 @@ class DocumentType(Enum):
99
MESSAGE = "message"
1010
COMMENT = "comment"
1111
PERSON = "person"
12-
GIT_ISSUE = "git_issue"
12+
ISSUE = "issue"
1313
GIT_PR = "git_pr"
1414

1515

16+
class DocumentStatus(Enum):
17+
OPEN = "open"
18+
IN_PROGRESS = "in_progress"
19+
CLOSED = "closed"
20+
21+
1622
class FileType(Enum):
1723
GOOGLE_DOC = "doc"
1824
DOCX = "docx"
@@ -35,8 +41,8 @@ def from_mime_type(cls, mime_type: str):
3541

3642
@dataclass
3743
class BasicDocument:
38-
id: Union[int, str]
39-
data_source_id: int
44+
id: Union[int, str] # row id in database
45+
data_source_id: int # data source id in database
4046
type: DocumentType
4147
title: str
4248
content: str
@@ -46,6 +52,7 @@ class BasicDocument:
4652
location: str
4753
url: str
4854
file_type: FileType = None
55+
status: DocumentStatus = None
4956

5057
@property
5158
def id_in_data_source(self):

app/data_source/sources/gitlab/__init__.py

Whitespace-only changes.
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
import logging
2+
from datetime import datetime
3+
4+
import requests
5+
from typing import Dict, List, Optional
6+
7+
from data_source.api.base_data_source import BaseDataSource, BaseDataSourceConfig, ConfigField, HTMLInputType
8+
from data_source.api.basic_document import BasicDocument, DocumentType, DocumentStatus
9+
from data_source.api.exception import InvalidDataSourceConfig
10+
from queues.index_queue import IndexQueue
11+
12+
GITLAB_BASE_URL = "https://gitlab.com/api/v4"
13+
14+
logger = logging.getLogger(__name__)
15+
16+
17+
class GitlabConfig(BaseDataSourceConfig):
18+
access_token: str
19+
20+
21+
def gitlab_status_to_doc_status(status: str) -> Optional[DocumentStatus]:
22+
if status == "opened":
23+
return DocumentStatus.OPEN
24+
elif status == "closed":
25+
return DocumentStatus.CLOSED
26+
else:
27+
logger.warning(f"[!] Unknown status {status}")
28+
return None
29+
30+
31+
class GitlabDataSource(BaseDataSource):
32+
33+
def _feed_project_issues(self, project: Dict):
34+
project_id = project["id"]
35+
project_url = project["web_url"]
36+
37+
issues_url = f"{GITLAB_BASE_URL}/projects/{project_id}/issues"
38+
39+
issues_response = self._session.get(issues_url)
40+
issues_response.raise_for_status()
41+
issues_json = issues_response.json()
42+
43+
for issue in issues_json:
44+
last_modified = datetime.strptime(issue["updated_at"], "%Y-%m-%dT%H:%M:%S.%fZ")
45+
if last_modified < self._last_index_time:
46+
logger.info(f"Issue {issue['id']} is too old, skipping")
47+
continue
48+
49+
doc = BasicDocument(
50+
id=issue["id"],
51+
data_source_id=self._data_source_id,
52+
type=DocumentType.ISSUE,
53+
title=issue['title'],
54+
content=issue["description"] if not None else "",
55+
author=issue['author']['name'],
56+
author_image_url=issue['author']['avatar_url'],
57+
location=issue['references']['full'].replace("/", " / "),
58+
url=issue['web_url'],
59+
timestamp=last_modified,
60+
status=gitlab_status_to_doc_status(issue["state"])
61+
)
62+
IndexQueue.get_instance().put_single(doc=doc)
63+
64+
@staticmethod
65+
def get_config_fields() -> List[ConfigField]:
66+
return [
67+
ConfigField(label="API Access Token", name="access_token", input_type=HTMLInputType.PASSWORD),
68+
]
69+
70+
@staticmethod
71+
def validate_config(config: Dict) -> None:
72+
try:
73+
parsed_config = GitlabConfig(**config)
74+
session = requests.Session()
75+
session.headers.update({"PRIVATE-TOKEN": parsed_config.access_token})
76+
projects_response = session.get(f"{GITLAB_BASE_URL}/projects?membership=true")
77+
projects_response.raise_for_status()
78+
except (KeyError, ValueError) as e:
79+
raise InvalidDataSourceConfig from e
80+
81+
def __init__(self, *args, **kwargs):
82+
super().__init__(*args, **kwargs)
83+
self.gitlab_config = GitlabConfig(**self._raw_config)
84+
self._session = requests.Session()
85+
self._session.headers.update({"PRIVATE-TOKEN": self.gitlab_config.access_token})
86+
87+
def _list_all_projects(self) -> List[Dict]:
88+
projects = []
89+
page = 1
90+
per_page = 100
91+
92+
while True:
93+
try:
94+
projects_response = self._session.get(f"{GITLAB_BASE_URL}/projects?membership=true"
95+
f"&per_page={per_page}&page={page}")
96+
projects_response.raise_for_status()
97+
new_projects: List[Dict] = projects_response.json()
98+
projects.extend(new_projects)
99+
100+
if len(new_projects) < per_page:
101+
break
102+
103+
page += 1
104+
except:
105+
logging.exception("Error while fetching projects")
106+
107+
return projects
108+
109+
def _feed_new_documents(self) -> None:
110+
for project in self._list_all_projects():
111+
logger.info(f"Feeding project {project['name']}")
112+
self.add_task_to_queue(self._feed_project_issues, project=project)

app/data_source/sources/google_drive/google_drive.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,11 +180,14 @@ def _feed_file(self, file):
180180
author = first_owner.get('displayName')
181181
author_image_url = first_owner.get('photoLink')
182182

183+
# title is file name without extension
184+
title = file['name'].split('.')[0]
185+
183186
doc = BasicDocument(
184187
id=file_id,
185188
data_source_id=self._data_source_id,
186189
type=DocumentType.DOCUMENT,
187-
title=file['name'],
190+
title=title,
188191
content=content,
189192
author=author,
190193
author_image_url=author_image_url,

app/data_source/sources/slack/slack.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ def _feed_conversation(self, conv: SlackConversation):
126126
message_id = message['client_msg_id']
127127
readable_timestamp = datetime.datetime.fromtimestamp(float(timestamp))
128128
message_url = f"https://slack.com/app_redirect?channel={conv.id}&message_ts={timestamp}"
129-
last_msg = BasicDocument(title=conv.name, content=text, author=author.name,
129+
last_msg = BasicDocument(title=author.name, content=text, author=author.name,
130130
timestamp=readable_timestamp, id=message_id,
131131
data_source_id=self._data_source_id, location=conv.name,
132132
url=message_url, author_image_url=author.image_url,

app/data_sources/gitlab.py

Lines changed: 0 additions & 124 deletions
This file was deleted.

app/indexing/index_documents.py

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,26 @@
11
import logging
22
import re
3-
from typing import List
3+
from enum import Enum
4+
from typing import List, Optional
45

56
from data_source.api.basic_document import BasicDocument
7+
from db_engine import Session
8+
from indexing.bm25_index import Bm25Index
9+
from indexing.faiss_index import FaissIndex
10+
from models import bi_encoder
611
from paths import IS_IN_DOCKER
712
from schemas import Document, Paragraph
8-
from models import bi_encoder
9-
from indexing.faiss_index import FaissIndex
10-
from indexing.bm25_index import Bm25Index
11-
from db_engine import Session
12-
1313

1414
logger = logging.getLogger(__name__)
1515

1616

17+
def get_enum_value_or_none(enum: Optional[Enum]) -> Optional[str]:
18+
if enum is None:
19+
return None
20+
21+
return enum.value
22+
23+
1724
class Indexer:
1825

1926
@staticmethod
@@ -23,7 +30,8 @@ def index_documents(documents: List[BasicDocument]):
2330
ids_in_data_source = [document.id_in_data_source for document in documents]
2431

2532
with Session() as session:
26-
documents_to_delete = session.query(Document).filter(Document.id_in_data_source.in_(ids_in_data_source)).all()
33+
documents_to_delete = session.query(Document).filter(
34+
Document.id_in_data_source.in_(ids_in_data_source)).all()
2735
if documents_to_delete:
2836
logging.info(f'removing documents that were updated and need to be re-indexed.')
2937
Indexer.remove_documents(documents_to_delete, session)
@@ -43,7 +51,8 @@ def index_documents(documents: List[BasicDocument]):
4351
data_source_id=document.data_source_id,
4452
id_in_data_source=document.id_in_data_source,
4553
type=document.type.value,
46-
file_type=document.file_type.value if document.file_type is not None else None,
54+
file_type=get_enum_value_or_none(document.file_type),
55+
status=get_enum_value_or_none(document.status),
4756
title=document.title,
4857
author=document.author,
4958
author_image_url=document.author_image_url,
@@ -120,7 +129,7 @@ def _add_metadata_for_indexing(paragraph: Paragraph) -> str:
120129
return result
121130

122131
@staticmethod
123-
def remove_documents(documents: List[Document], session = None):
132+
def remove_documents(documents: List[Document], session=None):
124133
logger.info(f"Removing {len(documents)} documents")
125134

126135
# Get the paragraphs from the documents
@@ -135,4 +144,4 @@ def remove_documents(documents: List[Document], session = None):
135144
logger.info(f"Removing documents from BM25 index...")
136145
Bm25Index.get().update(session=session)
137146

138-
logger.info(f"Finished removing {len(documents)} documents => {len(db_paragraphs)} paragraphs")
147+
logger.info(f"Finished removing {len(documents)} documents => {len(db_paragraphs)} paragraphs")

app/schemas/document.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from typing import Optional
22

33
from schemas.base import Base
4-
from sqlalchemy import String, DateTime, ForeignKey, Column, Integer
4+
from sqlalchemy import String, DateTime, ForeignKey, Column, Integer, Boolean
55
from sqlalchemy.orm import Mapped, mapped_column, relationship
66

77

@@ -14,6 +14,7 @@ class Document(Base):
1414
data_source = relationship("DataSource", back_populates="documents")
1515
type: Mapped[Optional[str]] = mapped_column(String(32))
1616
file_type: Mapped[Optional[str]] = mapped_column(String(32))
17+
status: Mapped[Optional[bool]] = mapped_column(String(32))
1718
title: Mapped[Optional[str]] = mapped_column(String(128))
1819
author: Mapped[Optional[str]] = mapped_column(String(64))
1920
author_image_url: Mapped[Optional[str]] = mapped_column(String(512))
8.01 KB
Loading

ui/src/assets/images/calendar.svg

Lines changed: 7 additions & 0 deletions
Loading

0 commit comments

Comments
 (0)