# Introduction

Notebook that for select projects in certain languages will try and scrape build logs to find whether certain tools are used in the build flow of the projects. 

In [1]:
# Imports
%load_ext autoreload
%autoreload 2

from pymongo import MongoClient

import travis.scrape_build_logs as build_scraper

import analysis_utilities
import random
import re


In [2]:
mongo_client = MongoClient()

database = mongo_client["graduation"]
pull_requests_collection = database["pull_requests"]
projects_collection = database["projects"]
commits_collection = database["commits"]

## Project selection

We only select certain projects, as tool names differ per language / environment. For now we focus on Python

In [3]:
projects_list = list(projects_collection.find({'succeeded' : True, 'travis_is_oldest_ci': True, 'language': 'Python'}))

print("{} Python projects".format(len(projects_list)))

for project in projects_list:
    split_slug = project["full_name"].split("/")
    
    prs = list(pull_requests_collection.find(
                { 
                    'project_name' : split_slug[1],
                    'project_owner': split_slug[0]
                }
            ))
    
    pr_before, pr_after = analysis_utilities.split_prs_on_build_date(project, prs, True, "status_travis_date")
    
    project["travis_prs"] = pr_after

127 Python projects


In [4]:
def process_project(project):
    
    if len(project["travis_prs"]) < 20:
        return None
    
    print(project["full_name"])

    sample_prs = random.sample(project["travis_prs"],  20)

    sample_commits = [sha for commits in [pr["commits"] for pr in sample_prs] for sha in commits]

    sample_commits = random.sample(sample_commits, 15)
    
    target_urls = []
    
    for sha in sample_commits:
        commit = commits_collection.find_one({'sha':sha, 'statuses' : {'$exists': True}})
        if commit is None:
            continue
            
        if "statuses" in commit:
            for status in commit["statuses"]:
                if "target_url" in status and status["target_url"] is not None:
                    if "builds" in status["target_url"] and \
                        "travis-ci.org" in status["target_url"]:
                        target_urls.append(status["target_url"])
    
            
    #target_urls = [status["target_url"] for statuses in [commits_collection.find_one({'sha':sha, 'statuses' : {'$exists': True}})["statuses"] for sha in sample_commits] for status in statuses if "target_url" in status and  "builds" in status["target_url"] and "travis-ci.org" in status["target_url"]]

    build_ids = list(set([build_scraper.retrieve_build_identifier_from_travis_url(url) for url in target_urls]))
    
    if len(build_ids) > 7:
        build_ids = random.sample(build_ids, 7)

    logs = build_scraper.build_logs_for_identifiers(build_ids)
    
    process_logs(logs)

In [11]:
def process_logs(logs):
    has_linter = False
    has_coverage = False
    
    lint_lines = []
    coverage_lines = []
    
    for log in logs:
        lines = log.split("\n")
        
        for line in lines:
            if "lint" in line:
                has_linter = True
                lint_lines.append(line)
                
            if "codecov" in line or "coverage" in line or "Pytest-cov" in line or "coveralls" in line:
                has_coverage = True
                coverage_lines.append(line)
                
    print("\tLint status is {}, coverage status is {}".format(has_linter, has_coverage))
    for line in coverage_lines[:10]:
        print(line)

In [None]:
for project in projects_list:
    process_project(project)

OP2/PyOP2
	Lint status is False, coverage status is False
OSU-Net/cyder
	Lint status is False, coverage status is False
Theano/Theano
	Lint status is True, coverage status is False
ansible/ansible-modules-core
	Lint status is False, coverage status is False
ansible/ansible-modules-extras
	Lint status is False, coverage status is False
Pylons/pyramid
	Lint status is True, coverage status is True
Searching for coverage
Reading https://pypi.python.org/simple/coverage/
Best match: coverage 4.0a5
Downloading https://pypi.python.org/packages/source/c/coverage/coverage-4.0a5.zip#md5=8a59799b1c1740d211346d6e88990815
Processing coverage-4.0a5.zip
Writing /tmp/easy_install-szsqgf/coverage-4.0a5/setup.cfg
Running coverage-4.0a5/setup.py -q bdist_egg --dist-dir /tmp/easy_install-szsqgf/coverage-4.0a5/egg-dist-tmp-Fh3Ikz
creating /home/travis/virtualenv/python2.6.9/lib/python2.6/site-packages/coverage-4.0a5-py2.6-linux-x86_64.egg
Extracting coverage-4.0a5-py2.6-linux-x86_64.egg to /home/travis/virt