# Step 3.1: Apache Status Checker

In [1]:
import pandas as pd 
import os

from threading import Thread, Event
import time
import concurrent.futures
import pickle
import csv

import sys
sys.path.append('../') # To import Python script from other location
from ProjectAnalysis import ProjectAnalysis

In [2]:
def get_results(project_name):
    
    pa = ProjectAnalysis(project_name,  1, root=results_path)
    
    real_success = 0
    real_fails = 0
    
    replicate_fails = 0
    replicate_success = 0
    
    real_maven_replicate_success = 0
    real_maven_replicate_fails = 0
    
    ant_fails = 0
    ant_success = 0
    
    commit_with_build_sistem = 0
    
    total_commits = pa.df()['id'].count()
    
    with open("/home/previousResults/TufanoResults/analyzedProjects/"+project_name+"/compilation.log.csv", 'r+') as f:
        oldResults = list(csv.DictReader(f, delimiter=","))
    
    for idx, commit_dict in enumerate(pa.csvDict.values()):
        build=pa.get_build_file(idx, commit_dict['commit'])
        hasPomOriginally = oldResults[idx]['POM_FILE'] == 'true'
        hasPomNow = any(b['build_system'] == 'Maven' for b in build['builds_checked'])
        hasAntBuildFile = any(b['build_system'] == 'Ant' for b in build['builds_checked'])
        
        # Get Maven build status (or Ant build status if Ant is the only Build System)
        buildWorks = build['builds_checked'][0]['works']
        mainBuildSystem = build['builds_checked'][0]['build_system']
        
        
        if build['build_system'] != "NOT_FOUND":
            commit_with_build_sistem += 1
        
            if build['works']:
                # BUILD SUCCESS

                if build['build_system'] == "Maven":
                    # A commit that success and has Maven BS (Our experiment)
                    if hasPomNow:
                        real_maven_replicate_success+=1
                    # A commit that success and was checked in original experiment (has pom.xml)
                    if hasPomOriginally:
                        replicate_success+=1

                # IF BUILD WORKS WITH MAVEN, WE DON'T CHECK ANT BUILD

                if build['build_system'] == "Ant":
                    # Get only success where Ant is the only Build System
                    ant_success += 1

                real_success += 1

            else:
                # BUILD FAIL

                if build['build_system'] == "Maven":
                    # A commit that fails and has Maven BS (Our experiment)
                    if hasPomNow:
                        real_maven_replicate_fails+=1
                    # A commit that fails and was checked in original experiment (has pom.xml)
                    if hasPomOriginally:
                        replicate_fails+=1

                if build['build_system'] == "Ant":
                    # Get only fails where Ant is he only Build System
                    ant_fails += 1

                real_fails += 1
                
                
    return real_success,real_fails,replicate_fails,replicate_success,real_maven_replicate_success,real_maven_replicate_fails,ant_fails,ant_success,commit_with_build_sistem 

In [3]:
headers=[
    'Project',
    'TotalCommits',
    
    'Original Buildable commits',
    'Original',
    'Original (%)', 
    
    'Replicated',
    'Replicated (%)',
    
    'Real Buildable commits',
    'Real Replicated',
    'Real Replicated (%)',
    
    'Ant Fails',
    'Ant Success',
    
    'Real Buildable commits + Ant',
    'Real Replicated + Ant', 
    'Real Replicated + Ant (%)', 
    
    'Complete' 
    ]
def checkApacheProject(repo):
    repo_report = pd.read_csv(results_path+repo['PROJECT']+"/experiment_1/report_experiment_1.csv")
    total_commits = repo_report['id'].count()
    success_commits=repo_report[repo_report['build'] == 'SUCCESS']['id'].count()
    real_success,real_fails,replicate_fails,replicate_success,real_maven_replicate_success,real_maven_replicate_fails,ant_fails,ant_success,commit_with_build_sistem = get_results(repo['PROJECT'])
    print("Finish: %s"%repo['PROJECT'])
    
    return (
        repo['PROJECT'], 
        repo['TOTAL_HISTORY'], # Total commits
        
        # ORIGINAL STUDY
        
        replicate_fails+replicate_success, # Original Buildable commits (has pom.xml)
        repo['COMPILABLE_HISTORY'], # Original
        repo['COMPILABILITY']*100, # Original (%)
        
        # REPLICATION STUDY
        
        replicate_success,# Replicated 
        (replicate_success/(replicate_fails+replicate_success))*100, # Replicated (%)
        
        # EXTENDED REPLICATION STUDY
        
        real_maven_replicate_success+real_maven_replicate_fails, # Real Buildable commits (has pom.xml)
        real_maven_replicate_success, # Real Replicated 
        (real_maven_replicate_success/(real_maven_replicate_success+real_maven_replicate_fails))*100, # Real Replicated (%)
        
        ant_fails, # Ant Fails
        ant_success, # Ant Success
        
        commit_with_build_sistem, # Real Buildable commits + Ant
        success_commits,# Real Replicated + Ant
        (success_commits/(success_commits+real_fails))*100, # Real Replicated + Ant (%)
        
        (success_commits/repo['TOTAL_HISTORY'])*100 # Complete (%)
    )

In [4]:
# Project structre
# -----------------------------------
# PROJECT               myfaces-html5
# TOTAL_HISTORY                    77
# BUILD_HISTORY                    76
# COMPILABLE_HISTORY               25
# COMPILABILITY              0.328947
# BREAKS                            2

df = pd.read_csv("/home/previousResults/TufanoResults/compilability.csv")
results_path="/home/results/Apache/"
future_results = []
with concurrent.futures.ThreadPoolExecutor() as executor:
    for row in df.iterrows():
        repo = row[1]
#         if repo['PROJECT'] == 'james-jspf': #DELETE
        if os.path.exists(results_path+repo['PROJECT']):
            future = executor.submit(checkApacheProject, repo)
            future_results.append(future)
        
results = [future.result() for future in future_results]

Finish: log4j-component
Finish: james-jdkim
Finish: log4j-zeroconf
Finish: myfaces-portlet-bridge
Finish: maven-app-engine
Finish: nuvem
Finish: maven-plugin-testing
Finish: james-postage
Finish: log4j-receivers
Finish: karaf-cave
Finish: kalumet
Finish: myfaces-html5
Finish: maven-resources
Finish: log4j-extras
Finish: maven-enforcer
Finish: maven-doxia-tools
Finish: james-hupa
Finish: maven-jxr
Finish: servicemix
Finish: struts-examples
Finish: servicemix-components
Finish: karaf-cellar
Finish: servicemix4-specs
Finish: myfaces-extval
Finish: olingo-odata2
Finish: james-jsieve
Finish: olingo-odata4
Finish: tiles-autotag
Finish: qpid-jms
Finish: james-jspf
Finish: uima-addons
Finish: james-mime4j
Finish: odftoolkit
Finish: servicemix4-kernel
Finish: oltu
Finish: myfaces-scripting
Finish: uima-uimafit
Finish: sirona
Finish: rampart
Finish: sandesha
Finish: tuscany-sca-1.x
Finish: webservices-neethi
Finish: webservices-commons-xmlschema
Finish: servicemix4-features
Finish: maven-doxia
F

In [7]:
import csv

with open('ApacheStatusCheckerResults.csv','w+') as out:
    csv_out=csv.writer(out)
    csv_out.writerow(headers)
    for result in results:
        csv_out.writerow(result)

In [8]:
# See original experiment (100 projects)
df = pd.read_csv("/home/previousResults/TufanoResults/compilability.csv")
df.sum()

PROJECT               isisjackrabbitjackrabbit-filevaultjackrabbit-o...
TOTAL_HISTORY                                                    174505
BUILD_HISTORY                                                    132484
COMPILABLE_HISTORY                                                31696
COMPILABILITY                                                   37.7462
BREAKS                                                              891
dtype: object