# Step 3.1: Apache Status Checker

In [1]:
import pandas as pd 
import os

from threading import Thread, Event
import time
import concurrent.futures
import pickle
import csv

import sys
sys.path.append('../') # To import Python script from other location
from ProjectAnalysis import ProjectAnalysis

In [2]:
def get_results(project_name):
    
    pa = ProjectAnalysis(project_name,  1, root=results_path)
    
    result = {}
    
    # result["real_success"] = 0 NOT NECCESSARY
    result["real_fails"] = 0
    result["replicate_fails"] = 0
    result["replicate_success"] = 0
    result["real_maven_replicate_success"] = 0
    result["real_maven_replicate_fails"] = 0
    result["ant_fails"] = 0
    result["ant_success"] = 0
    result["commit_with_build_sistem"] = 0
    
    result["original_buildable_commits"] = 0
    result["maven_buildable_commits"] = 0
    
    with open("/home/previousResults/analyzedProjects/"+project_name+"/compilation.log.csv", 'r+') as f:
        oldResults = list(csv.DictReader(f, delimiter=","))
    
    for idx, commit_dict in enumerate(pa.csvDict.values()):
        build=pa.get_build_file(idx, commit_dict['commit'])
        hasPomOriginally = oldResults[idx]['POM_FILE'] == 'true'
        hasPomNow = any(b['build_system'] == 'Maven' for b in build['builds_checked'])
        hasAntBuildFile = any(b['build_system'] == 'Ant' for b in build['builds_checked'])
        
        # SOME COMMITS HAS POM.XML (AS REPORTED IN PREV STUDY) BUT WORKS WITH ANT (DOUBLE BUILD FILE)
        # FOR THIS REASON, WE COUNT BUILDABLE COMMITS HERE
        if hasPomOriginally: result["original_buildable_commits"] += 1
            
        if hasPomNow: result["maven_buildable_commits"] += 1
    
        if build['build_system'] != "NOT_FOUND":
            result["commit_with_build_sistem"] += 1
            
            if build['build_system'] == "Maven":
                
                if build['works']: # BUILD SUCCESS
                    # A commit that success and has Maven BS (Our experiment)
                    if hasPomNow:
                        result["real_maven_replicate_success"]+=1
                    if hasPomOriginally:
                        result["replicate_success"]+=1
                else:  # BUILD FAIL
                    result["real_fails"] += 1
                    # A commit that fails and has Maven BS (Our experiment)
                    if hasPomNow:
                        result["real_maven_replicate_fails"]+=1
                    # A commit that fails and was checked in original experiment (has pom.xml)
                    # We calculate replicate_fails as substraction of:
                    # > original_buildable_commits - replicate_success
                    # if hasPomOriginally:
                    #    result["replicate_fails"]+=1       

            # IF BUILD WORKS WITH MAVEN, WE DON'T CHECK ANT BUILD

            if build['build_system'] == "Ant":
                
                if build['works']: # BUILD SUCCESS
                    # Get only success where Ant is the only Build System
                    result["ant_success"] += 1
                else: # BUILD FAIL
                    # Get only fails where Ant is he only Build System
                    result["ant_fails"] += 1
                    result["real_fails"] += 1
    
    result["replicate_fails"] = result["original_buildable_commits"] - result["replicate_success"]
    return result

In [3]:
headers=[
    'Project',
    'TotalCommits',
    
    'Original Buildable commits',
    'Original',
    'Original (%)', 
    
    'Replicated',
    'Replicated (%)',
    
    'Real Buildable commits',
    'Real Replicated',
    'Real Replicated (%)',
    
    'Ant Fails',
    'Ant Success',
    
    'Real Buildable commits + Ant',
    'Real Replicated + Ant', 
    'Real Replicated + Ant (%)', 
    
    'Complete' 
    ]
def checkApacheProject(repo):
    repo_report = pd.read_csv(results_path+repo['PROJECT']+"/experiment_1/report_experiment_1.csv")
    total_commits = repo_report['id'].count()
    success_commits=repo_report[repo_report['build'] == 'SUCCESS']['id'].count()
    result = get_results(repo['PROJECT'])
    print("Finish: %s"%repo['PROJECT'])
    
    return (
        repo['PROJECT'], 
        repo['TOTAL_HISTORY'], # Total commits
        
        # ORIGINAL STUDY
        
        result["original_buildable_commits"], # Original Buildable commits (has pom.xml)
        repo['COMPILABLE_HISTORY'], # Original
        repo['COMPILABILITY']*100, # Original (%)
        
        # REPLICATION STUDY
        
        result["replicate_success"],# Replicated 
        (result["replicate_success"]/(result["original_buildable_commits"]))*100, # Replicated (%)
        
        # EXTENDED REPLICATION STUDY
        
        result["maven_buildable_commits"], # Real Buildable commits (has pom.xml)
        result["real_maven_replicate_success"], # Real Replicated 
        (result["real_maven_replicate_success"]/(result["real_maven_replicate_success"]+result["real_maven_replicate_fails"]))*100, # Real Replicated (%)
        
        result["ant_fails"], # Ant Fails
        result["ant_success"], # Ant Success
        
        result["commit_with_build_sistem"], # Real Buildable commits + Ant
        success_commits,# Real Replicated + Ant
        (success_commits/(success_commits+result["real_fails"]))*100, # Real Replicated + Ant (%)
        (success_commits/repo['TOTAL_HISTORY'])*100 # Complete (%)
    )

In [5]:
# Project structre
# -----------------------------------
# PROJECT               myfaces-html5
# TOTAL_HISTORY                    77
# BUILD_HISTORY                    76
# COMPILABLE_HISTORY               25
# COMPILABILITY              0.328947
# BREAKS                            2

df = pd.read_csv("/home/previousResults/compilability.csv")
results_path="/home/results/Apache_old/"
future_results = []
with concurrent.futures.ThreadPoolExecutor() as executor:
    for row in df.iterrows():
        repo = row[1]
        # if repo['PROJECT'] == 'james-jsieve': print(checkApacheProject(repo))
        if os.path.exists(results_path+repo['PROJECT']):
            future = executor.submit(checkApacheProject, repo)
            future_results.append(future)
        
results = [future.result() for future in future_results]

Finish: james-jsieve
Finish: james-jspf
Finish: maven-app-engine
Finish: log4j-zeroconf
Finish: nuvem
Finish: log4j-component
Finish: myfaces-portlet-bridge
Finish: myfaces-html5
Finish: james-jdkim
Finish: olingo-odata2
Finish: james-postage
Finish: maven-plugin-testing
Finish: olingo-odata4
Finish: maven-resources
Finish: qpid-jms
Finish: servicemix
Finish: log4j-receivers
Finish: struts-examples
Finish: karaf-cave
Finish: myfaces-extval
Finish: servicemix-components
Finish: odftoolkit
Finish: oltu
Finish: rampart
Finish: tiles-autotag
Finish: uima-addons
Finish: myfaces-scripting
Finish: servicemix4-specs
Finish: log4j-extras
Finish: maven-jxr
Finish: sirona
Finish: sandesha
Finish: maven-doxia-tools
Finish: uima-uimafit
Finish: kalumet
Finish: servicemix4-kernel
Finish: qpid-proton
Finish: tuscany-sca-1.x
Finish: webservices-commons-xmlschema
Finish: pluto
Finish: whirr
Finish: oozie
Finish: servicemix4-features
Finish: maven-doxia
Finish: maven-enforcer
Finish: uima-ruta
Finish: t

In [6]:
import csv

with open('ApacheStatusCheckerResults.csv','w+') as out:
    csv_out=csv.writer(out)
    csv_out.writerow(headers)
    for result in results:
        csv_out.writerow(result)

In [7]:
# See original experiment (100 projects)
df = pd.read_csv("/home/previousResults/compilability.csv")
df.sum()

PROJECT               isisjackrabbitjackrabbit-filevaultjackrabbit-o...
TOTAL_HISTORY                                                    174505
BUILD_HISTORY                                                    132484
COMPILABLE_HISTORY                                                31696
COMPILABILITY                                                   37.7462
BREAKS                                                              891
dtype: object