Skip to content

Commit

Permalink
[WiP] revive issue parser for #103
Browse files Browse the repository at this point in the history
  • Loading branch information
nelsonic committed May 29, 2019
1 parent 1d94380 commit e6c4df4
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 36 deletions.
50 changes: 26 additions & 24 deletions lib/issue.js
Original file line number Diff line number Diff line change
@@ -1,51 +1,53 @@
/**
* profile method scrapes a given GitHub user profile
* @param {Object} $ - cheerio object with DOM of page to be scraped
* @param {string} url - a valid GitHub issue url
* @param {function} callback - the callback we should call after scraping
* @param {String} url - a valid GitHub issue url
* @param {Function} callback - the callback we should call after scraping
* a callback passed into this method should accept two parameters:
* @param {objectj} error an error object (set to null if no error occurred)
* @param {object} data - the complete issue contents + meta data
* @param {Object} error an error object (set to null if no error occurred)
* @param {Object} data - the complete issue contents + meta data
*/
module.exports = function issue($, url, callback) {
/* UNCOMMENT THIS IF YOU HAVE TIME/PATIENTCE TO FIX IT...!

var data = { entries : [], labels : [], participants : [] };
data.url = url;
data.title = $('.js-issue-title').first().text().trim();
data.state = $('.state').first().text().trim();
console.log(' - - - - - > ' +data.state)
// console.log($('.gh-header-title'));
data.title = $('.gh-header-title').first().text().trim().split('\n')[0];

data.state = $('.State').first().text().trim();
data.author = $('.gh-header-meta .author').first().text().trim();
data.created = $('.gh-header-meta time')[0].attribs.datetime;
data.created = $('relative-time')[0].attribs.datetime;

// labels
$('.label').each(function(){
$('.IssueLabel').each(function(){
data.labels.push($(this).attr('title'));
})
// data.labels.filter((l) => l === true);
var milestone = $('.milestone-name')
if(milestone.length > 0){
data.milestone = milestone[0].attribs.title;
}
var assignee = $('.sidebar-assignee img');
var assignee = $('.assignee');
if(assignee.length > 0){
data.assignee = assignee[0].attribs.alt.replace('@', '');
data.assignee = assignee.text().trim();
}

//participants
$('.participant-avatar').each(function(){
data.participants.push($(this).attr('aria-label'));
data.participants.push($(this).attr('href').replace('/',''));
})
console.log(' - - - - - > data', data)
// NOTE: this is possibly the most messed up DOM structure ever!
// its almost as if someone @GitHub is deliberately trying ot prevent crawlers!
// its almost as if someone @GitHub is deliberately trying to prevent crawlers
var entries = $('.comment:nth-child(2)'); // yes! its bananas!
for(var i=0; i < entries.length; i++) {
var id = entries[i].attribs.id; // see: http://git.io/vOC5d
var entry = {"id":id};
entry.author = $('#'+id+' .author').attr('href').replace('/','');
entry.created = $('#'+id+' time').attr('datetime');
entry.body = $('#'+id+' .comment-body').first().text().trim();
data.entries.push(entry);
}
// for(var i=0; i < entries.length; i++) {
// var id = entries[i].attribs.id; // see: http://git.io/vOC5d
// var entry = {"id":id};
// entry.author = $('#'+id+' .author').attr('href').replace('/','');
// entry.created = $('#'+id+' time').attr('datetime');
// entry.body = $('#'+id+' .comment-body').first().text().trim();
// data.entries.push(entry);
// }
return callback(null, data);
*/

}
2 changes: 1 addition & 1 deletion lib/scrapers.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
module.exports = {
// feed: require('./feed'), // activity feed (RSS)
followers: require('./followers'), // also scrapes following or stargazers
// issue: require('./issue'),
issue: require('./issue'),
// issues: require('./issues'),
// issues_search: require('./issues_search'),
// labels : require('./labels'),
Expand Down
13 changes: 7 additions & 6 deletions lib/switcher.js
Original file line number Diff line number Diff line change
Expand Up @@ -76,18 +76,19 @@ module.exports = function switcher (url, callback) {
else if(url.match(/people/)) {
scraper = 'people';
}
else {
scraper = 'repo';
}
// else if(url.match(/milestones/)) {
// scraper = 'milestones';
// }
// else if(url.match(/labels/)) {
// scraper = 'labels';
// }
// else if($('.issue').length > 0) {
// scraper = 'issue';
// }
else if($('.issue').length > 0) {
scraper = 'issue';
}
else {
scraper = 'repo';
}

// else { // else if(url.match(/issues/)) {
// scraper = 'issues';
// }
Expand Down
14 changes: 9 additions & 5 deletions test/issue.test.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
var test = require('tape');
var issue = require('../lib/switcher');

test.skip('Scrape /dwyl/tudo/issues/51 for all comments and meta-data', function(t){
test.only('Scrape /dwyl/tudo/issues/51 for comments & meta-data', function (t) {
var url = '/dwyl/tudo/issues/51';
issue(url, function(err, data) {
t.ok(data.url.indexOf(url) > -1, url + ' is: ' +data.url)
Expand All @@ -11,12 +11,16 @@ test.skip('Scrape /dwyl/tudo/issues/51 for all comments and meta-data', function
t.ok(data.created.length > 0, url + ' was created on: '+data.created);
// labels
t.ok(data.labels.length > 2, url + ' has '+data.labels.length + ' labels')
t.ok(data.milestone === 'Minimal Usable Product', 'Milestone is: '+data.milestone);
t.ok(data.milestone === 'Minimal Usable Product', 'Milestone is: '
+ data.milestone);
t.ok(data.assignee.length > 0, url + ' has assignee: '+ data.assignee);
t.ok(data.participants.length > 2, url + ' has participants: ' + data.participants);
t.ok(data.participants.indexOf('iteles') > -1, url + ' has participation from @iteles');
t.ok(data.participants.length > 2, url + ' has participants: '
+ data.participants);
t.ok(data.participants.indexOf('iteles') > -1, url
+ ' has participation from @iteles');

t.ok(data.entries.length > 2, url + ' has: '+data.entries.length);
t.ok(data.entries.length > 2,
url + ' has: '+data.entries.length + ' comments');

t.end();
});
Expand Down

0 comments on commit e6c4df4

Please sign in to comment.