@@ -20,27 +20,78 @@ class GitlabConfig(BaseModel):
2020
2121
2222class GitlabDataSource (BaseDataSource ):
23+
24+ def _parse_issues (self , documents : [], project_id : str , project_url : str ):
25+ issues_url = f"{ GITLAB_BASE_URL } /projects/{ project_id } /issues"
26+
27+ issues_response = self ._session .get (issues_url )
28+ issues_response .raise_for_status ()
29+ issues_json = issues_response .json ()
30+
31+ for issue in issues_json :
32+ last_modified = datetime .strptime (issue ["updated_at" ], "%Y-%m-%dT%H:%M:%S.%fZ" )
33+ if last_modified < self ._last_index_time :
34+ continue
35+
36+ documents .append (BasicDocument (
37+ id = issue ["id" ],
38+ data_source_id = self ._data_source_id ,
39+ type = DocumentType .GIT_ISSUE ,
40+ title = issue ['title' ],
41+ content = issue ["description" ] if not None else "" ,
42+ author = issue ['author' ]['name' ],
43+ author_image_url = issue ['author' ]['avatar_url' ],
44+ location = project_url ,
45+ url = issue ['web_url' ],
46+ timestamp = last_modified
47+ ))
48+
49+ def _parse_pull_requests (self , documents : [], project_id : str , project_url : str ):
50+ pull_requests_url = f"{ GITLAB_BASE_URL } /projects/{ project_id } /merge_requests"
51+
52+ pull_requests_response = self ._session .get (pull_requests_url )
53+ pull_requests_response .raise_for_status ()
54+ pull_requests_json = pull_requests_response .json ()
55+
56+ for pull_request in pull_requests_json :
57+ last_modified = datetime .strptime (pull_request ["updated_at" ], "%Y-%m-%dT%H:%M:%S.%fZ" )
58+ if last_modified < self ._last_index_time :
59+ continue
60+
61+ documents .append (BasicDocument (
62+ id = pull_request ["id" ],
63+ data_source_id = self ._data_source_id ,
64+ type = DocumentType .GIT_PR ,
65+ title = pull_request ['title' ],
66+ content = pull_request ["description" ] if not None else "" ,
67+ author = pull_request ['author' ]['name' ],
68+ author_image_url = pull_request ['author' ]['avatar_url' ],
69+ location = project_url ,
70+ url = pull_request ['web_url' ],
71+ timestamp = last_modified
72+ ))
73+
2374 @staticmethod
2475 def validate_config (config : Dict ) -> None :
2576 try :
2677 parsed_config = GitlabConfig (** config )
2778 session = requests .Session ()
2879 session .headers .update ({"PRIVATE-TOKEN" : parsed_config .access_token })
2980 projects_response = session .get (PROJECTS_URL )
30- if projects_response .status_code != 200 :
31- raise ValueError ("Invalid api key" )
81+ projects_response .raise_for_status ()
3282 except (KeyError , ValueError ) as e :
3383 raise InvalidDataSourceConfig from e
3484
3585 def __init__ (self , * args , ** kwargs ):
3686 super ().__init__ (* args , ** kwargs )
3787 # Create a access token with sufficient permissions in https://gitlab.com/-/profile/personal_access_tokens
3888 self .gitlab_config = GitlabConfig (** self ._config )
39- self .session = requests .Session ()
40- self .session .headers .update ({"PRIVATE-TOKEN" : self .gitlab_config .access_token })
89+ self ._session = requests .Session ()
90+ self ._session .headers .update ({"PRIVATE-TOKEN" : self .gitlab_config .access_token })
4191
4292 def _feed_new_documents (self ) -> None :
43- projects_response = self .session .get (PROJECTS_URL )
93+ projects_response = self ._session .get (PROJECTS_URL )
94+ projects_response .raise_for_status ()
4495 projects = projects_response .json ()
4596
4697 self ._parse_projects_in_parallel (projects )
@@ -51,49 +102,9 @@ def _parse_projects_worker(self, projects):
51102
52103 for project in projects :
53104 project_id = project ["id" ]
54- issues_url = f"{ GITLAB_BASE_URL } /projects/{ project_id } /issues"
55- issues_response = self .session .get (issues_url )
56- issues_json = issues_response .json ()
57-
58- for issue in issues_json :
59- last_modified = datetime .strptime (issue ["updated_at" ], "%Y-%m-%dT%H:%M:%S.%fZ" )
60- if last_modified < self ._last_index_time :
61- continue
62-
63- documents .append (BasicDocument (
64- id = issue ["id" ],
65- data_source_id = self ._data_source_id ,
66- type = DocumentType .DOCUMENT ,
67- title = issue ['title' ],
68- content = issue ["description" ] if not None else "" ,
69- author = issue ['author' ]['name' ],
70- author_image_url = issue ['author' ]['avatar_url' ],
71- location = project ["web_url" ],
72- url = issue ['web_url' ],
73- timestamp = last_modified
74- ))
75-
76- pull_requests_url = f"{ GITLAB_BASE_URL } /projects/{ project_id } /merge_requests"
77- pull_requests_response = self .session .get (pull_requests_url )
78- pull_requests_json = pull_requests_response .json ()
79-
80- for pull_request in pull_requests_json :
81- last_modified = datetime .strptime (pull_request ["updated_at" ], "%Y-%m-%dT%H:%M:%S.%fZ" )
82- if last_modified < self ._last_index_time :
83- continue
84-
85- documents .append (BasicDocument (
86- id = pull_request ["id" ],
87- data_source_id = self ._data_source_id ,
88- type = DocumentType .DOCUMENT ,
89- title = pull_request ['title' ],
90- content = pull_request ["description" ] if not None else "" ,
91- author = pull_request ['author' ]['name' ],
92- author_image_url = pull_request ['author' ]['avatar_url' ],
93- location = project ["web_url" ],
94- url = pull_request ['web_url' ],
95- timestamp = last_modified
96- ))
105+ project_url = project ["web_url" ]
106+ self ._parse_issues (documents , project_id , project_url )
107+ self ._parse_pull_requests (documents , project_id , project_url )
97108
98109 IndexingQueue .get ().feed (documents )
99110
0 commit comments