In [69]:
from pymongo import MongoClient

mongo_client = MongoClient()

database = mongo_client["graduation"]

pull_requests_collection = database["pull_requests"]

projects_collection = database["projects"]




In [74]:
from urllib.parse import urlparse

scraped_projects = projects_collection.find({'succeeded': True})

print("Analyzing {}".format(scraped_projects.count()))

for project in scraped_projects:
    print("-----------------------------------------")
    print("Starting {}".format(project["full_name"]))
    
    pull_requests = list(pull_requests_collection.find({'project_name': project["full_name"].split("/")[1],
                                                       'project_owner': project["full_name"].split("/")[0]}))
        
    commits = list([commit for pr in pull_requests for commit in pr["commits"]])
    
    statuses = list([status for commit in commits for status in commit["statuses"]])
    
    print("{} commits, and {} statuses".format(len(commits), len(statuses)))
    
    print("Oldest reported status is: {}".format(min([status["created_at"] for status in statuses])))
    
    all_urls = [status["target_url"] for status in statuses]
    
    hostnames = list(set([urlparse(url).hostname for url in all_urls if urlparse(url).hostname is not None]))
    
    print("Total of {} unique hostnames".format(len(hostnames)))
    
    print("Hostnames are: {}".format(", ".join(hostnames)))
    
    oldest_travis_date = min([status["created_at"] for status in statuses if urlparse(status["target_url"]).hostname == "travis-ci.org"])
    
    print("Oldest travis status is: {}, versus the travis reported date: {}".format(oldest_travis_date, project["first_build_date_travis"]))
    
    prs_before = [pr for pr in pull_requests if pr["created_at"] < oldest_travis_date]
    prs_after = [pr for pr in pull_requests if pr["created_at"] > oldest_travis_date]
    
    print("{} prs before, and {} prs after".format(len(prs_before), len(prs_after)))
    
    project["status_travis_date"] = oldest_travis_date
    
    

  """


Analyzing 30
-----------------------------------------
Starting Leaflet/Leaflet
11500 commits, and 12228 statuses
Oldest reported status is: 2013-02-01 18:26:16
Total of 1 unique hostnames
Hostnames are: travis-ci.org
Oldest travis status is: 2013-02-01 18:26:16, versus the travis reported date: 2016-07-29 18:03:34
446 prs before, and 2010 prs after
-----------------------------------------
Starting MISP/MISP
3483 commits, and 11948 statuses
Oldest reported status is: 2013-12-12 15:40:55
Total of 5 unique hostnames
Hostnames are: app.continuousphp.com, travis-ci.org, continuousphp.com, www.codefactor.io, coveralls.io
Oldest travis status is: 2013-12-12 15:40:55, versus the travis reported date: 2016-07-20 09:27:16
5 prs before, and 770 prs after
-----------------------------------------
Starting MRtrix3/mrtrix3
5793 commits, and 19542 statuses
Oldest reported status is: 2015-08-20 14:20:59
Total of 2 unique hostnames
Hostnames are: ci.appveyor.com, travis-ci.org
Oldest travis status is

5443 commits, and 3600 statuses
Oldest reported status is: 2015-12-02 00:39:20
Total of 1 unique hostnames
Hostnames are: travis-ci.org
Oldest travis status is: 2015-12-02 00:39:20, versus the travis reported date: 2016-07-28 20:38:14
615 prs before, and 394 prs after
-----------------------------------------
Starting OpenLightingProject/ola
8704 commits, and 60770 statuses
Oldest reported status is: 2014-01-08 18:39:57
Total of 6 unique hostnames
Hostnames are: www.codacy.com, travis-ci.org, app.codacy.com, ci.appveyor.com, coveralls.io, snyk.io
Oldest travis status is: 2014-01-08 18:39:57, versus the travis reported date: 2016-07-31 02:07:42
8 prs before, and 920 prs after
-----------------------------------------
Starting AFNetworking/AFNetworking
4852 commits, and 15108 statuses
Oldest reported status is: 2013-05-16 18:06:42
Total of 3 unique hostnames
Hostnames are: coveralls.io, codecov.io, travis-ci.org
Oldest travis status is: 2013-05-16 18:06:42, versus the travis reported dat