# Supporting code and data for "..."

In [1]:
%matplotlib inline

import os
import sys
print(f'Python {sys.version}')

import IPython
from IPython.core.display import display, HTML
print(f'IPython {IPython.__version__}')

print('\nLibraries:\n')

import csv
print(f'csv {csv.__version__}')

import matplotlib
import matplotlib.pyplot as plt
print(f'matplotlib {matplotlib.__version__}')

import numpy as np
print(f'numpy {np.__version__}')

import pandas as pd
from pandas.plotting import register_matplotlib_converters
print(f'pandas {pd.__version__}')

import re
print(f're {re.__version__}')

import requests
print(f'requests {requests.__version__}')

#import scipy
#import scipy.stats
#print(f'scipy {scipy.__version__}')


#import statsmodels
#import statsmodels.formula.api as smf
#from statsmodels.stats.outliers_influence import summary_table
#print(f'statsmodels {statsmodels.__version__}')

Python 3.9.6 (default, Jun 28 2021, 08:57:49) 
[GCC 10.3.0]
IPython 7.24.1

Libraries:

csv 1.0
matplotlib 3.4.2
numpy 1.20.3
pandas 1.2.4
re 2.2.1
requests 2.25.1


## Data collection

We use the GitHub GraphQL API because it allows fetching only the information we need, and at a much faster rate (we can get up to 100 nodes in a single request). Getting all the objects of a certain type requires then to repeat the request to go through all the pages of results.

You need to provide a personal `api_token` if you want to get fresh data from GitHub. Otherwise, this notebook will skip the data collection step and load the CSV files from the local filesystem.

In [2]:
api_token = ''

In [3]:
def requestAllPages(query,rows_and_next_variables,filename,columns):
  if api_token == '':
    return
  headers = {'Authorization': f'token {api_token}'}
  url = 'https://api.github.com/graphql'
  rows, variables = rows_and_next_variables(None)
  while len(variables)>0:
    json = {'query':query,'variables':variables.pop()}
    r = requests.post(url=url, json=json, headers=headers)
    if r.status_code == 403:
      print('Unauthorized request:')
      print(json)
    r.raise_for_status() # Abort if unsuccessful request
    new_rows, next_variables = rows_and_next_variables(r.json()['data'])
    rows += new_rows
    variables += next_variables
  if len(rows) > 0:
    with open(filename, 'w') as f:
      writer = csv.writer(f)
      writer.writerow(columns)
      writer.writerows(rows)

We search all PRs where CI minimization was proposed (excluding those authored by Jason Gross, which were mostly to debug the minimizer) and we retrieve all the comments from coqbot-app to know what happened. We only keep the first 15 lines of each comment, to reduce the size of the CSV file, because these lines will contain all the information we need.

Make sure to uncomment the last line and to provide an `api_token` to re-run this.

In [4]:
def fetch_pr_comments():

  query = """
    query commentQuery($number: Int!, $single: Boolean!, $prCursor: String, $commentCursor: String) {
      search(query: "repo:coq/coq coqbot ci minimize", type:ISSUE, first: 10, after: $prCursor) @skip (if: $single) {
        pageInfo {
          endCursor
          hasNextPage
        }
        nodes {
          ... pullRequest
        }
      }
      repository(owner: "coq", name: "coq") @include (if: $single) {
        pullRequest(number: $number) {
          ... pullRequest
        }
      }
    }

    fragment pullRequest on PullRequest {
      number
      author { login }
      comments(first: 50, after: $commentCursor) {
        pageInfo {
          endCursor
          hasNextPage
        }
        nodes {
          createdAt
          author { login }
          bodyText
          databaseId
        }
      }
    }
  """

  def treat_pr(pr):
    rows, variables = [], []
    number = pr['number']
    if pr['author']['login'] != 'JasonGross':
      for comment in pr['comments']['nodes']:
        if comment['author']['login'] == "coqbot-app":
          date = pd.to_datetime(comment['createdAt']).tz_localize(None)
          body = '\\n'.join(comment['bodyText'].split('\n')[:15])
          rows.append([comment['databaseId'],number,date,body])
    if pr['comments']['pageInfo']['hasNextPage']:
      variables += [{
          'single':True,
          'number':number,
          'commentCursor':pr['comments']['pageInfo']['endCursor']
      }]
    return rows, variables

  def rows_and_next_variables(data):
    if data is None:
      return [], [{'single':False,'number':0}]
    else:
      if 'search' in data:
        prs = data['search']
        rows, variables = [], []
        for pr in prs['nodes']:
          if 'number' in pr:
            new_rows, new_variables = treat_pr(pr)
            rows += new_rows
            variables += new_variables
        if prs['pageInfo']['hasNextPage']:
          variables += [{
              'single':False,
              'number':0,
              'prCursor':prs['pageInfo']['endCursor']
          }]
        return rows, variables
      else:
        return treat_pr(data['repository']['pullRequest'])

  requestAllPages(
      query,
      rows_and_next_variables,
      'pr_comments.csv',
      ['id','number','date','body']
  )

# fetch_pr_comments()

## Data processing

We retrieve the saved dataset from disk.

In [5]:
coqbot_comments = pd.read_csv('pr_comments.csv',parse_dates=['date'],index_col=0)

In [6]:
ci_minimize_results = coqbot_comments[coqbot_comments['body'].str.contains('Minimized File')].sort_values('number')
ci_minimize_results

Unnamed: 0_level_0,number,date,body
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
986083609,11966,2021-12-04 19:51:01,Minimized File /github/workspace/builds/coq/co...
985996564,11966,2021-12-04 09:16:38,Minimized File /github/workspace/builds/coq/co...
985961859,11966,2021-12-04 03:59:43,Minimized File /github/workspace/builds/coq/co...
985891061,11966,2021-12-03 22:42:41,Minimized File /github/workspace/builds/coq/co...
886395867,11966,2021-07-26 05:49:12,Minimized File /github/workspace/builds/coq/co...
...,...,...,...
1017955960,15518,2022-01-20 21:47:05,Minimized File /github/workspace/builds/coq/co...
1017957113,15518,2022-01-20 21:48:56,Minimized File /github/workspace/builds/coq/co...
1017959688,15518,2022-01-20 21:53:07,Minimized File /github/workspace/builds/coq/co...
1018009908,15518,2022-01-20 23:14:59,Minimized File /github/workspace/builds/coq/co...


We can see that our dataset includes 50 pull requests for which a reduced test case was produced by the bug minimizer.

In [7]:
minimized_prs = ci_minimize_results['number'].drop_duplicates()
len(minimized_prs)

50

### Matching CI minimize comments

We look for comments marking the beginning and the end of the minimization. We only keep the first run for each pull request and minimized project to avoid double counting minimization examples.

In [8]:
minimization_started_comments = coqbot_comments[coqbot_comments['body'].str.match(r'I (?:have initiated|am now running) minimization at commit [a-z0-9]* (?:for the suggested targets? | on)?')]
minimization_started_comments = minimization_started_comments[~minimization_started_comments.index.duplicated(keep='first')]
targets = minimization_started_comments['body'].str.extractall(r'(?P<target>ci-[^,.\s]*)')
minimization_started_comments = targets.join(minimization_started_comments).sort_values('date').drop_duplicates(subset=['target','number']).set_index(['target','number'])[['date']]
minimization_started_comments

Unnamed: 0_level_0,Unnamed: 1_level_0,date
target,number,Unnamed: 2_level_1
ci-mathcomp,13969,2021-05-21 22:34:08
ci-fourcolor,13969,2021-05-26 14:10:29
ci-equations,13969,2021-05-26 14:10:29
ci-iris,13969,2021-05-26 14:10:29
ci-perennial,13969,2021-05-26 14:10:29
...,...,...
ci-quickchick,15518,2022-01-20 21:17:11
ci-perennial,15518,2022-01-20 21:17:11
ci-metacoq,15518,2022-01-20 21:17:11
ci-math_classes,15518,2022-01-20 21:17:11


For successful minimization runs (i.e., runs that produced a minimized file, and were not automatically restarted after being interrupted by a timeout), we extract information from the headers of the minimized files, such as the expected `coqc` runtime on this file, the initial number of lines, and the final number of lines, or if any module couldn't be inlined.

In [9]:
minimization_success_comments = coqbot_comments[coqbot_comments['body'].str.startswith('Minimized File') & ~coqbot_comments['body'].str.contains('interrupted by timeout, being automatically continued')]
minimization_success_comments = minimization_success_comments.assign(
    runtime = minimization_success_comments['body'].str.extract(r'Expected coqc runtime on this file: ([0-9\.]+) sec').astype(float),
    initial_size = minimization_success_comments['body'].str.extract(r'from original input, then from ([0-9]+) lines').astype(float),
    final_size = minimization_success_comments['body'].str.extract(r'to ([0-9]+) lines \*\)').astype(float),
    inline_failure = minimization_success_comments['body'].str.contains('could not be inlined'),
    truncated = minimization_success_comments['body'].str.contains('Minimized Coq File \(truncated')
)
minimization_success_comments

Unnamed: 0_level_0,number,date,body,runtime,initial_size,final_size,inline_failure,truncated
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
996116369,15274,2021-12-16 19:31:28,Minimized File /github/workspace/builds/coq/co...,0.213,321.0,124.0,False,False
1001010494,15400,2021-12-25 12:16:25,Minimized File /github/workspace/builds/coq/co...,1.018,522.0,81.0,True,False
1017939088,15518,2022-01-20 21:22:16,Minimized File /github/workspace/builds/coq/co...,,,,False,True
1017939694,15518,2022-01-20 21:23:10,Minimized File /github/workspace/builds/coq/co...,0.117,442.0,31.0,False,False
1017939985,15518,2022-01-20 21:23:38,Minimized File /github/workspace/builds/coq/co...,0.134,76.0,25.0,False,False
...,...,...,...,...,...,...,...,...
872424689,13107,2021-07-01 17:27:36,Minimized File /github/workspace/builds/coq/co...,,3477.0,121.0,False,False
872633626,13107,2021-07-02 00:39:37,Minimized File /github/workspace/builds/coq/co...,,1465.0,73.0,False,False
872648442,13107,2021-07-02 01:28:51,Minimized File /github/workspace/builds/coq/co...,,2300.0,128.0,False,False
872665027,13107,2021-07-02 02:22:08,Minimized File /github/workspace/builds/coq/co...,,1465.0,133.0,False,False


Only 6 reduced case were explicitly recorded with an inline failure:

In [10]:
minimization_success_comments[minimization_success_comments['inline_failure']==True]

Unnamed: 0_level_0,number,date,body,runtime,initial_size,final_size,inline_failure,truncated
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1001010494,15400,2021-12-25 12:16:25,Minimized File /github/workspace/builds/coq/co...,1.018,522.0,81.0,True,False
1018088738,15518,2022-01-21 01:50:46,Minimized File /github/workspace/builds/coq/co...,1.084,2242.0,201.0,True,False
1015412511,15501,2022-01-18 13:28:18,Minimized File /github/workspace/builds/coq/co...,0.484,727.0,105.0,True,False
1015698198,15501,2022-01-18 18:33:20,Minimized File (from ci-color) (full log on G...,1.399,508.0,610.0,True,False
986135923,11966,2021-12-04 23:37:37,Minimized File /github/workspace/builds/coq/co...,1.011,2430.0,1320.0,True,True
967250099,15171,2021-11-12 16:33:26,Minimized File (from ci-vst) (full log on Git...,10.944,330.0,613.0,True,False


But there are a few more where the minimizer failed to inline dependencies and failed to insert header comments:

In [11]:
minimization_success_comments[minimization_success_comments['initial_size'].isna()]

Unnamed: 0_level_0,number,date,body,runtime,initial_size,final_size,inline_failure,truncated
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1017939088,15518,2022-01-20 21:22:16,Minimized File /github/workspace/builds/coq/co...,,,,False,True
1013700274,15487,2022-01-15 15:26:47,Minimized File /github/workspace/builds/coq/co...,,,,False,True
870663613,13107,2021-06-29 14:43:55,Minimized File /github/workspace/builds/coq/co...,,,,False,True
870663887,13107,2021-06-29 14:44:15,Minimized File /github/workspace/builds/coq/co...,,,,False,False
870664233,13107,2021-06-29 14:44:40,Minimized File /github/workspace/builds/coq/co...,,,,False,True
870664255,13107,2021-06-29 14:44:41,Minimized File /github/workspace/builds/coq/co...,,,,False,False
870664569,13107,2021-06-29 14:45:02,Minimized File /github/workspace/builds/coq/co...,,,,False,False
870665002,13107,2021-06-29 14:45:36,Minimized File /github/workspace/builds/coq/co...,,,,False,False
870867794,13107,2021-06-29 19:46:50,Minimized File /github/workspace/builds/coq/co...,,,,False,False
901281069,14746,2021-08-18 17:06:24,Minimized File /github/workspace/builds/coq/co...,,,,False,False


For these reduced test cases, we also compute the total number of removed lines by aggregating the information found in the headers about the number of lines removed at each step.

In [12]:
line_reduction = minimization_success_comments['body'].str.extractall(r'then from (?P<reduced_from>[0-9]+) lines to (?P<reduced_to>[0-9]+) lines')
line_reduction = line_reduction.assign(total_line_reduction = np.maximum(line_reduction['reduced_from'].astype(int) - line_reduction['reduced_to'].astype(int),0))
line_reduction = line_reduction.groupby(level=[0]).sum()
minimization_success_comments = minimization_success_comments.join(line_reduction['total_line_reduction'])
minimization_success_comments

Unnamed: 0_level_0,number,date,body,runtime,initial_size,final_size,inline_failure,truncated,total_line_reduction
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
847034130,13969,2021-05-24 13:13:51,Minimized File /github/workspace/builds/coq/co...,,4170.0,109.0,False,False,5749.0
848805751,13969,2021-05-26 14:14:19,Minimized File /github/workspace/builds/coq/co...,,,,False,True,
848806668,13969,2021-05-26 14:15:27,Minimized File /github/workspace/builds/coq/co...,,,,False,False,
848807189,13969,2021-05-26 14:16:08,Minimized File /github/workspace/builds/coq/co...,,1726.0,3.0,False,False,3455.0
848809847,13969,2021-05-26 14:19:23,Minimized File /github/workspace/builds/coq/co...,,1119.0,66.0,False,False,1206.0
...,...,...,...,...,...,...,...,...,...
1017957113,15518,2022-01-20 21:48:56,Minimized File /github/workspace/builds/coq/co...,0.265,457.0,270.0,False,False,2904.0
1017959688,15518,2022-01-20 21:53:07,Minimized File /github/workspace/builds/coq/co...,0.339,4967.0,51.0,False,False,7489.0
1017981163,15518,2022-01-20 22:25:52,Minimized File /github/workspace/builds/coq/co...,0.488,464.0,474.0,False,False,3981.0
1018009908,15518,2022-01-20 23:14:59,Minimized File /github/workspace/builds/coq/co...,0.714,789.0,729.0,False,False,5213.0


In [13]:
minimization_failure_comments = coqbot_comments[coqbot_comments['body'].str.startswith('Error: Could not minimize file')]
minimization_failure_comments

Unnamed: 0_level_0,number,date,body
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1017937526,15518,2022-01-20 21:20:13,Error: Could not minimize file (from ci-sf) (...
1017937646,15518,2022-01-20 21:20:22,Error: Could not minimize file (from ci-itaut...
1017937663,15518,2022-01-20 21:20:24,Error: Could not minimize file (from ci-relat...
1017937985,15518,2022-01-20 21:20:54,Error: Could not minimize file (from ci-metac...
1017944365,15518,2022-01-20 21:29:39,Error: Could not minimize file (from ci-categ...
1018010794,15518,2022-01-20 23:16:34,Error: Could not minimize file (from ci-categ...
1018023671,15518,2022-01-20 23:41:13,Error: Could not minimize file (from ci-sf) (...
1018023737,15518,2022-01-20 23:41:23,Error: Could not minimize file (from ci-metac...
1018023811,15518,2022-01-20 23:41:33,Error: Could not minimize file (from ci-relat...
1018023841,15518,2022-01-20 23:41:38,Error: Could not minimize file (from ci-categ...


We aggregate successful and failed end minimization comments and only keep the first run for each project and pull request:

In [14]:
minimization_finished_comments = minimization_success_comments.assign(success=True).append(minimization_failure_comments.assign(success=False))
targets = minimization_finished_comments['body'].str.extract(r'(?P<target>ci-[^)]*)')
minimization_finished_comments = targets.join(minimization_finished_comments).sort_values('date').drop_duplicates(subset=['target','number']).set_index(['target','number'])#[['date','success','runtime']]
minimization_finished_comments

Unnamed: 0_level_0,Unnamed: 1_level_0,date,body,runtime,initial_size,final_size,inline_failure,truncated,total_line_reduction,success
target,number,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
ci-mathcomp,13969,2021-05-21 22:37:15,Error: Could not minimize file (from ci-mathc...,,,,,,,False
ci-iris,13969,2021-05-26 14:13:55,Error: Could not minimize file (from ci-iris)...,,,,,,,False
ci-equations,13969,2021-05-26 14:14:19,Minimized File /github/workspace/builds/coq/co...,,,,False,True,,True
ci-fourcolor,13969,2021-05-26 14:15:27,Minimized File /github/workspace/builds/coq/co...,,,,False,False,,True
ci-perennial,13969,2021-05-26 14:16:08,Minimized File /github/workspace/builds/coq/co...,,1726.0,3.0,False,False,3455.0,True
...,...,...,...,...,...,...,...,...,...,...
ci-iris,15518,2022-01-20 21:48:56,Minimized File /github/workspace/builds/coq/co...,0.265,457.0,270.0,False,False,2904.0,True
ci-compcert,15518,2022-01-20 21:53:07,Minimized File /github/workspace/builds/coq/co...,0.339,4967.0,51.0,False,False,7489.0,True
ci-coqprime,15518,2022-01-20 22:25:52,Minimized File /github/workspace/builds/coq/co...,0.488,464.0,474.0,False,False,3981.0,True
ci-argosy,15518,2022-01-20 23:14:59,Minimized File /github/workspace/builds/coq/co...,0.714,789.0,729.0,False,False,5213.0,True


We match these with the comments corresponding to the beginning of the minimization to compute the minimization duration:

In [15]:
minimization_pairs = minimization_started_comments.join(minimization_finished_comments,lsuffix='_start',rsuffix='_end')
minimization_pairs = minimization_pairs.assign(duration=(minimization_pairs['date_end'] - minimization_pairs['date_start']).dt.seconds)
minimization_pairs

Unnamed: 0_level_0,Unnamed: 1_level_0,date_start,date_end,body,runtime,initial_size,final_size,inline_failure,truncated,total_line_reduction,success,duration
target,number,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
ci-mathcomp,13969,2021-05-21 22:34:08,2021-05-21 22:37:15,Error: Could not minimize file (from ci-mathc...,,,,,,,False,187.0
ci-fourcolor,13969,2021-05-26 14:10:29,2021-05-26 14:15:27,Minimized File /github/workspace/builds/coq/co...,,,,False,False,,True,298.0
ci-equations,13969,2021-05-26 14:10:29,2021-05-26 14:14:19,Minimized File /github/workspace/builds/coq/co...,,,,False,True,,True,230.0
ci-iris,13969,2021-05-26 14:10:29,2021-05-26 14:13:55,Error: Could not minimize file (from ci-iris)...,,,,,,,False,206.0
ci-perennial,13969,2021-05-26 14:10:29,2021-05-26 14:16:08,Minimized File /github/workspace/builds/coq/co...,,1726.0,3.0,False,False,3455.0,True,339.0
...,...,...,...,...,...,...,...,...,...,...,...,...
ci-quickchick,15518,2022-01-20 21:17:11,2022-01-20 21:25:12,Minimized File /github/workspace/builds/coq/co...,0.130,164.0,34.0,False,False,135.0,True,481.0
ci-perennial,15518,2022-01-20 21:17:11,2022-01-20 21:47:05,Minimized File /github/workspace/builds/coq/co...,0.256,457.0,270.0,False,False,2904.0,True,1794.0
ci-metacoq,15518,2022-01-20 21:17:11,2022-01-20 21:20:54,Error: Could not minimize file (from ci-metac...,,,,,,,False,223.0
ci-math_classes,15518,2022-01-20 21:17:11,2022-01-20 21:32:19,Minimized File /github/workspace/builds/coq/co...,0.169,408.0,42.0,False,False,1359.0,True,908.0


For the successful cases, there are other data that we can analyze:

In [16]:
successful_minimization_pairs = minimization_pairs[minimization_pairs['success'] == True][['duration','runtime','initial_size','final_size','inline_failure','truncated','total_line_reduction']]
successful_minimization_pairs

Unnamed: 0_level_0,Unnamed: 1_level_0,duration,runtime,initial_size,final_size,inline_failure,truncated,total_line_reduction
target,number,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ci-fourcolor,13969,298.0,,,,False,False,
ci-equations,13969,230.0,,,,False,True,
ci-perennial,13969,339.0,,1726.0,3.0,False,False,3455.0
ci-quickchick,13969,534.0,,1119.0,66.0,False,False,1206.0
ci-interval,13895,353.0,,,,False,True,
...,...,...,...,...,...,...,...,...
ci-unimath,15518,562.0,0.150,3947.0,79.0,False,False,4327.0
ci-rewriter,15518,359.0,0.117,442.0,31.0,False,False,416.0
ci-quickchick,15518,481.0,0.130,164.0,34.0,False,False,135.0
ci-perennial,15518,1794.0,0.256,457.0,270.0,False,False,2904.0


Proportion of the time the minimizer was able to produce a minimized file:

In [17]:
len(successful_minimization_pairs)/len(minimization_pairs)

0.8391959798994975

In [18]:
successful_minimization_pairs['duration'].describe(percentiles=[0.5,0.6,0.7,0.8,0.9])

count      167.000000
mean      5419.125749
std      11047.054423
min        165.000000
50%        919.000000
60%       1506.400000
70%       3191.200000
80%       6133.000000
90%      19204.000000
max      73072.000000
Name: duration, dtype: float64

When are the reduced test cases fully standalone? No inlining failure:

In [19]:
no_inlining_failure = successful_minimization_pairs[(successful_minimization_pairs['inline_failure'] == False) & ~successful_minimization_pairs['final_size'].isna()]
no_inlining_failure

Unnamed: 0_level_0,Unnamed: 1_level_0,duration,runtime,initial_size,final_size,inline_failure,truncated,total_line_reduction
target,number,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ci-perennial,13969,339.0,,1726.0,3.0,False,False,3455.0
ci-quickchick,13969,534.0,,1119.0,66.0,False,False,1206.0
ci-hott,13126,8548.0,,294.0,483.0,False,False,15136.0
ci-fiat_crypto_legacy,13072,380.0,,116.0,63.0,False,False,250.0
ci-quickchick,13072,1399.0,,121.0,150.0,False,False,1596.0
...,...,...,...,...,...,...,...,...
ci-unimath,15518,562.0,0.150,3947.0,79.0,False,False,4327.0
ci-rewriter,15518,359.0,0.117,442.0,31.0,False,False,416.0
ci-quickchick,15518,481.0,0.130,164.0,34.0,False,False,135.0
ci-perennial,15518,1794.0,0.256,457.0,270.0,False,False,2904.0


In [20]:
len(no_inlining_failure) / len(successful_minimization_pairs)

0.8383233532934131

No inlining failure *and* not truncated:

In [21]:
fully_standalone = no_inlining_failure[no_inlining_failure['truncated'] == False]
fully_standalone

Unnamed: 0_level_0,Unnamed: 1_level_0,duration,runtime,initial_size,final_size,inline_failure,truncated,total_line_reduction
target,number,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ci-perennial,13969,339.0,,1726.0,3.0,False,False,3455.0
ci-quickchick,13969,534.0,,1119.0,66.0,False,False,1206.0
ci-hott,13126,8548.0,,294.0,483.0,False,False,15136.0
ci-fiat_crypto_legacy,13072,380.0,,116.0,63.0,False,False,250.0
ci-quickchick,13072,1399.0,,121.0,150.0,False,False,1596.0
...,...,...,...,...,...,...,...,...
ci-unimath,15518,562.0,0.150,3947.0,79.0,False,False,4327.0
ci-rewriter,15518,359.0,0.117,442.0,31.0,False,False,416.0
ci-quickchick,15518,481.0,0.130,164.0,34.0,False,False,135.0
ci-perennial,15518,1794.0,0.256,457.0,270.0,False,False,2904.0


In [22]:
len(fully_standalone) / len(successful_minimization_pairs)

0.7544910179640718

How much smaller the final size is compared to the initial size:

In [23]:
successful_minimization_pairs_non_zero = successful_minimization_pairs[successful_minimization_pairs['initial_size'] != 0]
np.mean((successful_minimization_pairs_non_zero['final_size']/successful_minimization_pairs_non_zero['initial_size']).dropna())

0.6031942878889917

In [24]:
successful_minimization_pairs[successful_minimization_pairs['initial_size']==0]

Unnamed: 0_level_0,Unnamed: 1_level_0,duration,runtime,initial_size,final_size,inline_failure,truncated,total_line_reduction
target,number,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ci-relation_algebra,13107,202.0,,0.0,638.0,False,False,0.0


Number of reduced cases for which we have computed the expected `coqc` runtime:

In [25]:
len(minimization_success_comments.dropna(subset=['runtime']))

54

In [26]:
successful_minimization_pairs.groupby(['target']).agg(['median','count'])

Unnamed: 0_level_0,duration,duration,runtime,runtime,initial_size,initial_size,final_size,final_size,total_line_reduction,total_line_reduction
Unnamed: 0_level_1,median,count,median,count,median,count,median,count,median,count
target,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
ci-aac_tactics,232.0,1,,0,279.0,1,27.0,1,256.0,1
ci-argosy,3163.5,4,0.714,1,540.0,4,369.0,4,3140.0,4
ci-bbv,619.0,3,0.095,1,7591.0,3,33.0,3,7547.0,3
ci-bedrock2,671.0,11,0.1325,4,533.0,9,464.0,9,3579.0,9
ci-bignums,8529.0,1,,0,1747.0,1,1719.0,1,8774.0,1
ci-category_theory,457.0,5,0.2365,2,262.0,3,105.0,3,838.0,3
ci-color,2564.0,6,1.2085,2,714.0,5,366.0,5,6596.0,5
ci-compcert,1197.0,5,0.339,1,728.0,5,362.0,5,1323.0,5
ci-coq_performance_tests,232.0,1,,0,,0,,0,,0
ci-coqprime,4121.0,1,0.488,1,464.0,1,474.0,1,3981.0,1


In [27]:
minimization_pairs_reset = minimization_pairs.reset_index()
minimization_pairs_reset[minimization_pairs_reset['target'].isin(['ci-bignums'])]

Unnamed: 0,target,number,date_start,date_end,body,runtime,initial_size,final_size,inline_failure,truncated,total_line_reduction,success,duration
51,ci-bignums,13107,2021-06-29 14:41:10,2021-06-29 17:03:19,Minimized File /github/workspace/builds/coq/co...,,1747.0,1719.0,False,True,8774.0,True,8529.0
