Skip to content

Commit

Permalink
Move repo stats to use mongo
Browse files Browse the repository at this point in the history
  • Loading branch information
MattIPv4 committed Nov 9, 2019
1 parent b19fb03 commit 5b06359
Show file tree
Hide file tree
Showing 5 changed files with 176 additions and 56 deletions.
16 changes: 15 additions & 1 deletion src/helpers/date.js
Expand Up @@ -13,4 +13,18 @@ function dateFromDay(year, day){
return new Date(date.setDate(day)); // add the number of days
}

module.exports = { getDateArray, dateFromDay };
function formatDate(date) {
const monthNames = [
"January", "February", "March",
"April", "May", "June", "July",
"August", "September", "October",
"November", "December"
];

const day = date.getDate();
const monthIndex = date.getMonth();

return `${monthNames[monthIndex]} ${day}`;
}

module.exports = { getDateArray, dateFromDay, formatDate };
4 changes: 2 additions & 2 deletions src/index.js
@@ -1,10 +1,10 @@
const mongo = require('./helpers/mongo');
const stats = require('./stats');

const main = async () => {
const db = await mongo.connect();
const dbo = db.db('hacktoberfest-prod-sample');
const PRStats = require('./stats/PRs');
await PRStats(dbo);
await stats(dbo);
db.close();
};

Expand Down
37 changes: 34 additions & 3 deletions src/stats/PRs.js
Expand Up @@ -5,7 +5,7 @@ const number = require('../helpers/number');
const chart = require('../helpers/chart');
const linguist = require('../helpers/linguist');
const color = require('../helpers/color');
const { getDateArray, dateFromDay } = require('../helpers/date');
const { getDateArray, dateFromDay, formatDate } = require('../helpers/date');

module.exports = async db => {
/***************
Expand Down Expand Up @@ -37,8 +37,8 @@ module.exports = async db => {
console.log(`Total PRs: ${number.commas(totalPRs)}`);
console.log(` Valid PRs: ${number.commas(totalValidPRs)} (${(totalValidPRs / totalPRs * 100).toFixed(2)}%)`);
console.log(` Invalid PRs: ${number.commas(totalInvalidPRs)} (${(totalInvalidPRs / totalPRs * 100).toFixed(2)}%)`);
console.log(` Invalid (excluded repo) PRs: ${number.commas(totalInvalidRepoPRs)} (${(totalInvalidRepoPRs / totalInvalidPRs * 100).toFixed(2)}%) (${(totalInvalidRepoPRs / totalPRs * 100).toFixed(2)}%)`);
console.log(` Invalid (labeled invalid) PRs: ${number.commas(totalInvalidLabelPRs)} (${(totalInvalidLabelPRs / totalInvalidPRs * 100).toFixed(2)}%) (${(totalInvalidLabelPRs / totalPRs * 100).toFixed(2)}%)`);
console.log(` of which were in an excluded repo: ${number.commas(totalInvalidRepoPRs)} (${(totalInvalidRepoPRs / totalInvalidPRs * 100).toFixed(2)}%)`);
console.log(` of which were labeled as invalid: ${number.commas(totalInvalidLabelPRs)} (${(totalInvalidLabelPRs / totalInvalidPRs * 100).toFixed(2)}%)`);

// Breaking down PRs by language, other tags
const totalPRsByLanguage = await db.collection('pull_requests').aggregate([
Expand Down Expand Up @@ -218,4 +218,35 @@ module.exports = async db => {
PRsByChanges.forEach(pr => {
console.log(` ${number.commas(pr.changes)} | ${pr.html_url}`);
});

// Breaking down PRs by day
const totalPRsByDay = await db.collection('pull_requests').aggregate([
{
'$set':
{
day: { '$dayOfYear': { '$dateFromString': { dateString: '$created_at' } } },
}
},
{
'$group':
{
_id: '$day',
count: { '$sum': 1 }
}
},
{
'$sort':
{
count: -1,
}
},
{
'$limit': 10,
},
]).toArray();
console.log('');
console.log('Top days by PRs:');
totalPRsByDay.forEach(day => {
console.log(` ${formatDate(dateFromDay(2019, day['_id']))} | ${number.commas(day.count)} (${(day.count / totalPRs * 100).toFixed(2)}%)`);
});
};
169 changes: 122 additions & 47 deletions src/stats/Repos.js
@@ -1,6 +1,8 @@
module.exports = async data => {
const { Repos } = data;
require('../prototypes');

const number = require('../helpers/number');

module.exports = async db => {
/***************
* Repo Stats
***************/
Expand All @@ -10,71 +12,144 @@ module.exports = async data => {
// We only have relevant PR data, this would need massive abuse of the GH API to determine

// Total: Repos and invalid repos
const totalRepos = Repos.length;
const ValidRepos = Repos.filter(repo => !repo.invalid);
const totalValidRepos = ValidRepos.length;
const InvalidRepos = Repos.filter(repo => repo.invalid);
const totalInvalidRepos = InvalidRepos.length;
const PermittedRepos = Repos.filter(repo => repo.permitted);
const totalPermittedRepos = PermittedRepos.length;
const totalRepos = await db.collection('repositories').find({}).count();
const totalInvalidRepos = (await db.collection('repositories').aggregate([
{
'$lookup':
{
from: 'spam_repositories',
localField: 'id',
foreignField: 'Repo ID',
as: 'spam'
}
},
{ '$match': { 'spam.Verified?': 'checked' } },
{ '$group': { _id: null, count: { '$sum': 1 } } },
]).limit(1).toArray())[0].count;
const totalValidRepos = totalRepos - totalInvalidRepos;
const totalPermittedRepos = (await db.collection('repositories').aggregate([
{
'$lookup':
{
from: 'spam_repositories',
localField: 'id',
foreignField: 'Repo ID',
as: 'spam'
}
},
{ '$match': { 'spam.Permitted?': 'checked' } },
{ '$group': { _id: null, count: { '$sum': 1 } } },
]).limit(1).toArray())[0].count;
console.log('');
console.log(`Total repos: ${totalRepos}`);
console.log(` Valid repos: ${totalValidRepos} (${(totalValidRepos / totalRepos * 100).toFixed(2)}%)`);
console.log(` Reported but approved repos: ${totalPermittedRepos} (${(totalPermittedRepos / totalRepos * 100).toFixed(2)}%)`);
console.log(` Invalid (excluded) repos: ${totalInvalidRepos} (${(totalInvalidRepos / totalRepos * 100).toFixed(2)}%)`);
console.log(`Total repos: ${number.commas(totalRepos)}`);
console.log(` Valid repos: ${number.commas(totalValidRepos)} (${(totalValidRepos / totalRepos * 100).toFixed(2)}%)`);
console.log(` of which were reported but approved: ${number.commas(totalPermittedRepos)} (${(totalPermittedRepos / totalValidRepos * 100).toFixed(2)}%)`);
console.log(` Excluded repos: ${number.commas(totalInvalidRepos)} (${(totalInvalidRepos / totalRepos * 100).toFixed(2)}%)`);

// Breaking down repos by language
const ReposByLanguage = Repos.groupBy(repo => repo.languageString());
const totalReposByLanguage = await db.collection('repositories').aggregate([
{
'$group':
{
_id: '$language',
count: { '$sum': 1 }
}
},
{
'$sort':
{
count: -1,
}
}
]).toArray();
console.log('');
console.log(`Repos by language: ${Object.keys(ReposByLanguage).length} languages`);
ReposByLanguage.forEach((key, val) => {
console.log(` ${key}: ${val.length} (${(val.length / totalRepos * 100).toFixed(2)}%)`);
console.log(`Repos by language: ${totalReposByLanguage.length} languages`);
totalReposByLanguage.limit(15).forEach(lang => {
const name = lang['_id'] || 'Undetermined';
console.log(` ${name}: ${number.commas(lang.count)} (${(lang.count / totalRepos * 100).toFixed(2)}%)`);
});

// Projects by popularity, contributors, stars (repo metadata)
const topReposByStars = Repos.sort((a, b) => {
return b.stargazers_count - a.stargazers_count;
}).limit(5);
const topReposByPRs = await db.collection('pull_requests').aggregate([
{
'$match': { 'labels.name': { '$nin': [ 'invalid' ] } },
},
{
'$group':
{
_id: '$base.repo.id',
count: { '$sum': 1 },
}
},
{
'$match': { '_id': { '$ne': null } },
},
{
'$lookup':
{
from: 'repositories',
localField: '_id',
foreignField: 'id',
as: 'repository',
}
},
{
'$project':
{
count: '$count',
repository: { '$arrayElemAt': [ '$repository', 0 ] },
}
},
{
'$lookup':
{
from: 'spam_repositories',
localField: 'repository.id',
foreignField: 'Repo ID',
as: 'spam'
}
},
{
'$match': { 'spam.Verified?': { '$nin': [ 'checked' ] } },
},
{
'$sort':
{
count: -1,
}
},
{
'$limit': 10,
},
]).toArray();
console.log('');
console.log('Top repos by PRs');
topReposByPRs.forEach(repo => {
console.log(` ${number.commas(repo.count)} | ${repo.repository.html_url}`);
});

const topReposByStars = await db.collection('repositories').find({}).sort({ stargazers_count: -1 })
.limit(5).toArray();
console.log('');
console.log('Top repos by stars');
topReposByStars.forEach(repo => {
console.log(` ${repo.stargazers_count} | ${repo.html_url}`);
console.log(` ${number.commas(repo.stargazers_count)} | ${repo.html_url}`);
});

const topReposByForks = Repos.sort((a, b) => {
return b.forks_count - a.forks_count;
}).limit(5);
const topReposByForks = await db.collection('repositories').find({}).sort({ forks_count: -1 })
.limit(5).toArray();
console.log('');
console.log('Top repos by forks');
topReposByForks.forEach(repo => {
console.log(` ${repo.forks_count} | ${repo.html_url}`);
console.log(` ${number.commas(repo.forks_count)} | ${repo.html_url}`);
});

const topReposByWatchers = Repos.sort((a, b) => {
return b.watchers_count - a.watchers_count;
}).limit(5);
const topReposByWatchers = await db.collection('repositories').find({}).sort({ watchers_count: -1 })
.limit(5).toArray();
console.log('');
console.log('Top repos by watchers');
topReposByWatchers.forEach(repo => {
console.log(` ${repo.watchers_count} | ${repo.html_url}`);
});

const topReposByPRs = Repos.sort((a, b) => {
return b.prs.length - a.prs.length;
}).limit(5);
console.log('');
console.log('Top repos by PRs');
topReposByPRs.forEach(repo => {
console.log(` ${repo.prs.length} | ${repo.html_url}`);
});

const topReposByContributors = Repos.sort((a, b) => {
return b.contributors().length - a.contributors().length;
}).limit(5);
console.log('');
console.log('Top repos by contributors');
topReposByContributors.forEach(repo => {
console.log(` ${repo.contributors().length} | ${repo.html_url}`);
console.log(` ${number.commas(repo.watchers_count)} | ${repo.html_url}`);
});

// Histogram breakdown by gitignores in repos
Expand Down
6 changes: 3 additions & 3 deletions src/stats/index.js
@@ -1,12 +1,12 @@
const statsGenerators = [
require('./PRs'),
require('./Repos'),
require('./Users'),
//require('./Users'),
];

module.exports = async data => {
module.exports = async db => {
for (const generator in statsGenerators) {
if (!statsGenerators.hasOwnProperty(generator)) return;
await statsGenerators[generator](data);
await statsGenerators[generator](db);
}
};

0 comments on commit 5b06359

Please sign in to comment.