# Supporting code and data for "..."

In [1]:
%matplotlib inline

import os
import sys
print(f'Python {sys.version}')

import IPython
from IPython.core.display import display, HTML
print(f'IPython {IPython.__version__}')

print('\nLibraries:\n')

import csv
print(f'csv {csv.__version__}')

import matplotlib
import matplotlib.pyplot as plt
print(f'matplotlib {matplotlib.__version__}')

import numpy as np
print(f'numpy {np.__version__}')

import pandas as pd
from pandas.plotting import register_matplotlib_converters
print(f'pandas {pd.__version__}')

import re
print(f're {re.__version__}')

import requests
print(f'requests {requests.__version__}')

#import scipy
#import scipy.stats
#print(f'scipy {scipy.__version__}')


#import statsmodels
#import statsmodels.formula.api as smf
#from statsmodels.stats.outliers_influence import summary_table
#print(f'statsmodels {statsmodels.__version__}')

Python 3.9.6 (default, Jun 28 2021, 08:57:49) 
[GCC 10.3.0]
IPython 7.24.1

Libraries:

csv 1.0
matplotlib 3.4.2
numpy 1.20.3
pandas 1.2.4
re 2.2.1
requests 2.25.1


## Data collection

We use the GitHub GraphQL API because it allows fetching only the information we need, and at a much faster rate (we can get up to 100 nodes in a single request). Getting all the objects of a certain type requires then to repeat the request to go through all the pages of results.

You need to provide a personal `api_token` if you want to get fresh data from GitHub. Otherwise, this notebook will skip the data collection step and load the CSV files from the local filesystem.

In [2]:
api_token = ''

In [3]:
def requestAllPages(query,rows_and_next_variables,filename,columns):
  if api_token == '':
    return
  headers = {'Authorization': f'token {api_token}'}
  url = 'https://api.github.com/graphql'
  rows, variables = rows_and_next_variables(None)
  while len(variables)>0:
    json = {'query':query,'variables':variables.pop()}
    r = requests.post(url=url, json=json, headers=headers)
    if r.status_code == 403:
      print('Unauthorized request:')
      print(json)
    r.raise_for_status() # Abort if unsuccessful request
    new_rows, next_variables = rows_and_next_variables(r.json()['data'])
    rows += new_rows
    variables += next_variables
  if len(rows) > 0:
    with open(filename, 'w') as f:
      writer = csv.writer(f)
      writer.writerow(columns)
      writer.writerows(rows)

We look for all the pull requests where the bot has proposed to minimize the CI failures. We get them by searching for the words "coqbot ci minimize".
This query is redundant with the next one, but useful if one only wants the list of PRs.
Uncomment the last line and make sure to provide an `api_token` to run it.

In [4]:
def fetch_prs():

  query = """
    query getPullRequestList($cursor: String) {
      search(query: "repo:coq/coq coqbot ci minimize", type:ISSUE, first: 100, after: $cursor) {
        nodes {
          ... on PullRequest {
            number
          }
        }
        pageInfo {
          hasNextPage
          endCursor
        }
      }
    }
  """

  def rows_and_next_variables(data):
    if data is None:
      return [], [{}]
    else:
      rows = []
      pullRequests = data['search']
      for node in pullRequests['nodes']:
        if 'number' in node:
            rows.append([node['number']])
      if pullRequests['pageInfo']['hasNextPage']:
        return rows, [{'cursor':pullRequests['pageInfo']['endCursor']}]
      else:
        return rows, []

  requestAllPages(
      query,
      rows_and_next_variables,
      'pullrequests.csv',
      ['number']
  )

# fetch_prs()

Here, we search again for all PRs where CI minimization was proposed but we retrieve all the comments to know what happened. Uncomment the last line and make sure to provide an `api_token` to re-run this.

In [5]:
def fetch_pr_comments():

  query = """
    query commentQuery($number: Int!, $single: Boolean!, $prCursor: String, $commentCursor: String) {
      search(query: "repo:coq/coq coqbot ci minimize", type:ISSUE, first: 10, after: $prCursor) @skip (if: $single) {
        pageInfo {
          endCursor
          hasNextPage
        }
        nodes {
          ... pullRequest
        }
      }
      repository(owner: "coq", name: "coq") @include (if: $single) {
        pullRequest(number: $number) {
          ... pullRequest
        }
      }
    }

    fragment pullRequest on PullRequest {
      number
      author { login }
      comments(first: 50, after: $commentCursor) {
        pageInfo {
          endCursor
          hasNextPage
        }
        nodes {
          createdAt
          author { login }
          bodyText
          databaseId
        }
      }
    }
  """

  def treat_pr(pr):
    rows, variables = [], []
    number = pr['number']
    pr_author = pr['author']['login']
    for comment in pr['comments']['nodes']:
      date = pd.to_datetime(comment['createdAt']).tz_localize(None)
      body = comment['bodyText'][:500].replace('\n','\\n')
      rows.append([comment['databaseId'],number,pr_author,date,comment['author']['login'],body])
    if pr['comments']['pageInfo']['hasNextPage']:
      variables += [{
          'single':True,
          'number':number,
          'commentCursor':pr['comments']['pageInfo']['endCursor']
      }]
    return rows, variables

  def rows_and_next_variables(data):
    if data is None:
      return [], [{'single':False,'number':0}]
    else:
      if 'search' in data:
        prs = data['search']
        rows, variables = [], []
        for pr in prs['nodes']:
          if 'number' in pr:
            new_rows, new_variables = treat_pr(pr)
            rows += new_rows
            variables += new_variables
        if prs['pageInfo']['hasNextPage']:
          variables += [{
              'single':False,
              'number':0,
              'prCursor':prs['pageInfo']['endCursor']
          }]
        return rows, variables
      else:
        return treat_pr(data['repository']['pullRequest'])

  requestAllPages(
      query,
      rows_and_next_variables,
      'pr_comments.csv',
      ['id','number','pr_author','date','author','body']
  )

# fetch_pr_comments()

## Data processing

In [6]:
pr_comments = pd.read_csv('pr_comments.csv',parse_dates=['date'],index_col=0)

Pre-processing: we exclude PRs opened by Jason Gross (mostly for testing the CI minimizer):

In [7]:
pr_comments = pr_comments[~ pr_comments['pr_author'].isin(['JasonGross'])]

Retrieve all comments that triggered the bug minimizer:

In [8]:
ci_minimize_comments = pr_comments[
    pr_comments['body'].str.contains('@coqbot:? [Cc][Ii][- ][Mm]inimize') &
    ~ pr_comments['author'].isin(['coqbot-app'])
  ].sort_values('number')
ci_minimize_comments

Unnamed: 0_level_0,number,pr_author,date,author,body
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
886229972,11966,olaure01,2021-07-25 16:58:40,olaure01,@coqbot ci minimize
864456480,12493,ppedrot,2021-06-19 19:57:50,JasonGross,@coqbot ci minimize
883774401,12493,ppedrot,2021-07-20 23:39:01,JasonGross,Hopefully things work better this time @coqbot...
864662708,12512,ppedrot,2021-06-21 01:37:25,JasonGross,@coqbot ci minimize
909638687,12512,ppedrot,2021-08-31 21:07:07,ppedrot,@coqbot ci minimize
...,...,...,...,...,...
965623788,15128,herbelin,2021-11-10 18:29:38,JasonGross,ci-bedrock2 was looping because the file took ...
965646057,15128,herbelin,2021-11-10 18:58:46,JasonGross,Reported ci-metacoq issue as MetaCoq/metacoq#6...
962297740,15128,herbelin,2021-11-06 00:58:25,JasonGross,@coqbot CI minimize
967020323,15171,ppedrot,2021-11-12 11:12:59,ppedrot,@coqbot ci minimize


In [9]:
ci_minimize_triggerers = ci_minimize_comments.drop_duplicates(subset=['number', 'author']).sort_values('pr_author')
ci_minimize_triggerers

Unnamed: 0_level_0,number,pr_author,date,author,body
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
898420893,14777,Alizter,2021-08-13 12:20:58,Alizter,@coqbot ci minimize
891995911,14740,Alizter,2021-08-03 16:39:13,Zimmi48,This wasn't expected that this would break any...
858809980,14480,SkySkimmer,2021-06-10 17:23:25,SkySkimmer,@coqbot ci minimize ci-iris
898883181,14785,SkySkimmer,2021-08-14 11:39:06,SkySkimmer,@coqbot ci minimize
946769730,15048,SkySkimmer,2021-10-19 14:15:59,SkySkimmer,@coqbot ci minimize hott
958160170,15088,SkySkimmer,2021-11-02 20:56:47,SkySkimmer,@coqbot ci minimize
898862039,14783,SkySkimmer,2021-08-14 07:59:42,SkySkimmer,@coqbot ci minimize\n(error at https://github....
870865222,13107,SkySkimmer,2021-06-29 19:42:28,JasonGross,"Minimization of ci-perennial should work now, ..."
897179754,14758,SkySkimmer,2021-08-11 21:54:13,Alizter,@coqbot ci minimize
939201151,14986,SkySkimmer,2021-10-09 01:37:39,JasonGross,"I've hopefully fixed the import problem, let's..."


Let's focus only on cases where the CI minimization did produce a minimized file:

In [10]:
ci_minimize_results = pr_comments[pr_comments['body'].str.contains('Minimized File') & pr_comments['author'].isin(['coqbot-app'])].sort_values('number')
ci_minimize_results

Unnamed: 0_level_0,number,pr_author,date,author,body
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
886395867,11966,olaure01,2021-07-26 05:49:12,coqbot-app,Minimized File /github/workspace/builds/coq/co...
886345796,11966,olaure01,2021-07-26 03:32:24,coqbot-app,Minimized File /github/workspace/builds/coq/co...
886265093,11966,olaure01,2021-07-25 22:15:29,coqbot-app,Minimized File /github/workspace/builds/coq/co...
886240171,11966,olaure01,2021-07-25 18:26:22,coqbot-app,Minimized File /github/workspace/builds/coq/co...
886230665,11966,olaure01,2021-07-25 17:04:47,coqbot-app,Minimized File /github/workspace/builds/coq/co...
...,...,...,...,...,...
967120685,15171,ppedrot,2021-11-12 13:31:35,coqbot-app,Minimized File /github/workspace/builds/coq/co...
967035675,15171,ppedrot,2021-11-12 11:38:27,coqbot-app,Minimized File /github/workspace/builds/coq/co...
967034807,15171,ppedrot,2021-11-12 11:36:46,coqbot-app,Minimized File /github/workspace/builds/coq/co...
967025888,15171,ppedrot,2021-11-12 11:22:06,coqbot-app,Minimized File /github/workspace/builds/coq/co...


In [11]:
minimized_prs = ci_minimize_results['number'].drop_duplicates()
len(minimized_prs)

39

We call the following "successful triggerers" but this is an overapproximation as it suffices that the CI minimizer was triggered successfully once in the PR, and that could have been by someone else in case several persons were involved in triggering it:

In [12]:
successful_triggerers = ci_minimize_triggerers[ci_minimize_triggerers['number'].isin(minimized_prs)]
successful_triggerers

Unnamed: 0_level_0,number,pr_author,date,author,body
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
898420893,14777,Alizter,2021-08-13 12:20:58,Alizter,@coqbot ci minimize
891995911,14740,Alizter,2021-08-03 16:39:13,Zimmi48,This wasn't expected that this would break any...
858809980,14480,SkySkimmer,2021-06-10 17:23:25,SkySkimmer,@coqbot ci minimize ci-iris
898883181,14785,SkySkimmer,2021-08-14 11:39:06,SkySkimmer,@coqbot ci minimize
946769730,15048,SkySkimmer,2021-10-19 14:15:59,SkySkimmer,@coqbot ci minimize hott
958160170,15088,SkySkimmer,2021-11-02 20:56:47,SkySkimmer,@coqbot ci minimize
898862039,14783,SkySkimmer,2021-08-14 07:59:42,SkySkimmer,@coqbot ci minimize\n(error at https://github....
870865222,13107,SkySkimmer,2021-06-29 19:42:28,JasonGross,"Minimization of ci-perennial should work now, ..."
897179754,14758,SkySkimmer,2021-08-11 21:54:13,Alizter,@coqbot ci minimize
939201151,14986,SkySkimmer,2021-10-09 01:37:39,JasonGross,"I've hopefully fixed the import problem, let's..."


In [13]:
users_to_survey = list(np.unique(np.append(successful_triggerers['pr_author'],successful_triggerers['author'])))
users_to_survey.remove('JasonGross')
users_to_survey.remove('Zimmi48')
users_to_survey

['Alizter',
 'SkySkimmer',
 'ana-borges',
 'gares',
 'herbelin',
 'mattam82',
 'maximedenes',
 'mrhaandi',
 'olaure01',
 'ppedrot',
 'proux01']

In [14]:
len(users_to_survey)

11

In [15]:
def get_relevant_prs(user):
    return list(np.unique(successful_triggerers[successful_triggerers['pr_author'].isin([user]) | successful_triggerers['author'].isin([user])]['number']))

In [16]:
print("Hello, we (@JasonGross, @Zimmi48 and @achlipala) are trying to evaluate how useful the new CI minimization feature of coqbot is in practice. \
For that, we have created a very short survey (and most questions are optional) and we would appreciate if you could answer it *for each* pull request where the CI minimizer was used \
(that you authored or where you triggered the minimizer). Thanks a lot for your help!\n\nHere is a list of pull requests for which we would appreciate \
your answer to the survey:")

for user in users_to_survey:
    print(f'\n@{user}:\n')
    for pr in get_relevant_prs(user):
        print(f'- [ ] coq/coq#{pr} ([direct survey link](https://docs.google.com/forms/d/e/1FAIpQLSeWNKcF_XM0PPkydbvx4gaiKJFUG5xpMyewYK1dbtjAQt7FnQ/viewform?entry.155981120={pr}))')

Hello, we (@JasonGross, @Zimmi48 and @achlipala) are trying to evaluate how useful the new CI minimization feature of coqbot is in practice. For that, we have created a very short survey (and most questions are optional) and we would appreciate if you could answer it *for each* pull request where the CI minimizer was used (that you authored or where you triggered the minimizer). Thanks a lot for your help!

Here is a list of pull requests for which we would appreciate your answer to the survey:

@Alizter:

- [ ] coq/coq#14736 ([direct survey link](https://docs.google.com/forms/d/e/1FAIpQLSeWNKcF_XM0PPkydbvx4gaiKJFUG5xpMyewYK1dbtjAQt7FnQ/viewform?entry.155981120=14736))
- [ ] coq/coq#14740 ([direct survey link](https://docs.google.com/forms/d/e/1FAIpQLSeWNKcF_XM0PPkydbvx4gaiKJFUG5xpMyewYK1dbtjAQt7FnQ/viewform?entry.155981120=14740))
- [ ] coq/coq#14758 ([direct survey link](https://docs.google.com/forms/d/e/1FAIpQLSeWNKcF_XM0PPkydbvx4gaiKJFUG5xpMyewYK1dbtjAQt7FnQ/viewform?entry.15598112

## Survey analysis

In [17]:
survey = pd.read_csv('survey.csv',index_col=0)
survey = survey.reset_index().set_index('Pull request number where the CI minimizer was used').sort_index()

In [18]:
survey

Unnamed: 0_level_0,Horodateur,Are you the author of the pull request?,Did you trigger the CI minimizer on this pull request yourself?,Is this the first time that you've used the CI minimizer?,Was the CI minimizer run on this pull request useful to you?,Did the output of the CI minimizer help you to understand the impact of the pull request on external projects?,(Before reading this question) did you use the output of the CI minimizer on this pull request to extend the test suite of Coq?,"If not, can you tell us why?",Do you have more feedback that you'd like to share with us?
Pull request number where the CI minimizer was used,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
11966,2021/11/07 6:17:04 PM UTC+1,Yes,Yes,Yes,Not useful,No,No (and I didn't plan to do so),,
13107,2021/11/07 12:10:11 PM UTC+1,Yes,No,,Not useful,No,No (and I didn't plan to do so),,
13895,2021/10/28 5:24:21 PM UTC+1,Yes,Yes,Yes,Not useful,No,No (and I didn't plan to do so),The output of the minimizer was too long and u...,I thought coqbot was too eager to remind us of...
13969,2021/11/03 4:48:02 PM UTC+1,Yes,Yes,Yes,Very useful,Yes,Yes,,
14480,2021/11/07 12:11:34 PM UTC+1,Yes,Yes,,Somewhat useful,Yes,Yes,,
14612,2021/10/29 8:55:36 AM UTC+1,Yes,Yes,Yes,Very useful,Yes,Yes,,The most impactful benefit of the minimizer fo...
14733,2021/11/07 12:12:29 PM UTC+1,No,Yes,No,Not useful,No,No (and I didn't plan to do so),,
14740,2021/10/29 12:44:13 AM UTC+1,Yes,No,No,Not useful,No,No (and I didn't plan to do so),"In this case, the CI errors were my fault and ...",
14758,2021/11/07 12:13:44 PM UTC+1,Yes,No,,Not useful,No,No (and I didn't plan to do so),,
14758,2021/10/29 12:46:25 AM UTC+1,No,Yes,Yes,Very useful,Yes,Yes,,Worked exactly as expected here.


In [19]:
self_triggerers = survey[survey['Did you trigger the CI minimizer on this pull request yourself?']=='Yes']
non_triggerers  = survey[survey['Did you trigger the CI minimizer on this pull request yourself?']=='No']

In [20]:
len(self_triggerers)

14

In [21]:
self_triggerers.groupby(['Was the CI minimizer run on this pull request useful to you?']).size()

Was the CI minimizer run on this pull request useful to you?
Not useful         5
Somewhat useful    3
Very useful        6
dtype: int64

In [22]:
self_triggerers.groupby(['Did the output of the CI minimizer help you to understand the impact of the pull request on external projects?']).size()

Did the output of the CI minimizer help you to understand the impact of the pull request on external projects?
No     4
Yes    5
dtype: int64

In [23]:
non_triggerers.groupby(['Was the CI minimizer run on this pull request useful to you?']).size()

Was the CI minimizer run on this pull request useful to you?
Not useful    5
dtype: int64

In [24]:
non_triggerers.groupby(['Did the output of the CI minimizer help you to understand the impact of the pull request on external projects?']).size()

Did the output of the CI minimizer help you to understand the impact of the pull request on external projects?
No    5
dtype: int64

In [25]:
self_triggerers[self_triggerers['Was the CI minimizer run on this pull request useful to you?'] == 'Not useful']

Unnamed: 0_level_0,Horodateur,Are you the author of the pull request?,Did you trigger the CI minimizer on this pull request yourself?,Is this the first time that you've used the CI minimizer?,Was the CI minimizer run on this pull request useful to you?,Did the output of the CI minimizer help you to understand the impact of the pull request on external projects?,(Before reading this question) did you use the output of the CI minimizer on this pull request to extend the test suite of Coq?,"If not, can you tell us why?",Do you have more feedback that you'd like to share with us?
Pull request number where the CI minimizer was used,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
11966,2021/11/07 6:17:04 PM UTC+1,Yes,Yes,Yes,Not useful,No,No (and I didn't plan to do so),,
13895,2021/10/28 5:24:21 PM UTC+1,Yes,Yes,Yes,Not useful,No,No (and I didn't plan to do so),The output of the minimizer was too long and u...,I thought coqbot was too eager to remind us of...
14733,2021/11/07 12:12:29 PM UTC+1,No,Yes,No,Not useful,No,No (and I didn't plan to do so),,
14777,2021/10/29 12:51:19 AM UTC+1,Yes,Yes,No,Not useful,No,No (and I didn't plan to do so),In this case the minimizer wasn't able to mini...,
14783,2021/11/07 12:15:05 PM UTC+1,Yes,Yes,No,Not useful,,,,this PR changed the ci commit used so the mini...


In [26]:
self_triggerers_useful = self_triggerers[self_triggerers['Was the CI minimizer run on this pull request useful to you?'] != 'Not useful']
self_triggerers_useful

Unnamed: 0_level_0,Horodateur,Are you the author of the pull request?,Did you trigger the CI minimizer on this pull request yourself?,Is this the first time that you've used the CI minimizer?,Was the CI minimizer run on this pull request useful to you?,Did the output of the CI minimizer help you to understand the impact of the pull request on external projects?,(Before reading this question) did you use the output of the CI minimizer on this pull request to extend the test suite of Coq?,"If not, can you tell us why?",Do you have more feedback that you'd like to share with us?
Pull request number where the CI minimizer was used,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
13969,2021/11/03 4:48:02 PM UTC+1,Yes,Yes,Yes,Very useful,Yes,Yes,,
14480,2021/11/07 12:11:34 PM UTC+1,Yes,Yes,,Somewhat useful,Yes,Yes,,
14612,2021/10/29 8:55:36 AM UTC+1,Yes,Yes,Yes,Very useful,Yes,Yes,,The most impactful benefit of the minimizer fo...
14758,2021/10/29 12:46:25 AM UTC+1,No,Yes,Yes,Very useful,Yes,Yes,,Worked exactly as expected here.
14785,2021/11/07 12:16:16 PM UTC+1,Yes,Yes,,Very useful,,No (and I didn't plan to do so),,
14819,2021/10/29 12:48:53 AM UTC+1,No,Yes,No,Very useful,Yes,No (and I didn't plan to do so),"In this case, an external project was found to...","It quickly identified what the problem was, th..."
14929,2021/11/07 12:18:36 PM UTC+1,No,Yes,,Somewhat useful,,,,
14986,2021/11/07 12:20:08 PM UTC+1,Yes,Yes,,Somewhat useful,,No (and I didn't plan to do so),,
15048,2021/11/07 12:21:11 PM UTC+1,Yes,Yes,,Very useful,,Yes,,


In [27]:
self_triggerers_useful.groupby(['(Before reading this question) did you use the output of the CI minimizer on this pull request to extend the test suite of Coq?']).size()

(Before reading this question) did you use the output of the CI minimizer on this pull request to extend the test suite of Coq?
No (and I didn't plan to do so)    3
Yes                                5
dtype: int64

## Matching CI minimize comments

In [28]:
coqbot_comments = pr_comments[pr_comments['author'].isin(['coqbot-app'])][['number','date','body']]
coqbot_comments

Unnamed: 0_level_0,number,date,body
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
963479027,15146,2021-11-08 19:01:57,"Hey, I have detected that there were CI failur..."
963972100,15146,2021-11-09 09:33:27,"Hey, I have detected that there were CI failur..."
963983256,15146,2021-11-09 09:48:10,"Hey, I have detected that there were CI failur..."
964012143,15146,2021-11-09 10:23:19,"Hey, I have detected that there were CI failur..."
964316860,15146,2021-11-09 16:27:45,"Hey, I have detected that there were CI failur..."
...,...,...,...
873329671,13107,2021-07-03 02:20:06,I am now running minimization at commit 7c1593...
873330611,13107,2021-07-03 02:28:22,Minimized File /github/workspace/builds/coq/co...
848807189,13969,2021-05-26 14:16:08,Minimized File /github/workspace/builds/coq/co...
848809847,13969,2021-05-26 14:19:23,Minimized File /github/workspace/builds/coq/co...


In [29]:
minimization_started_comments = coqbot_comments[coqbot_comments['body'].str.match(r'I (?:have initiated|am now running) minimization at commit [a-z0-9]* (?:for the suggested targets? | on)?')]
targets = minimization_started_comments['body'].str.extractall(r'(?P<target>ci-[^,.\s]*)')
minimization_started_comments = targets.join(minimization_started_comments).sort_values('date').drop_duplicates(subset=['target','number']).set_index(['target','number'])[['date']]
minimization_started_comments

Unnamed: 0_level_0,Unnamed: 1_level_0,date
target,number,Unnamed: 2_level_1
ci-mathcomp,13969,2021-05-21 22:34:08
ci-quickchick,13969,2021-05-26 14:10:29
ci-perennial,13969,2021-05-26 14:10:29
ci-equations,13969,2021-05-26 14:10:29
ci-iris,13969,2021-05-26 14:10:29
...,...,...
ci-hott,15171,2021-11-12 11:13:07
ci-fiat_crypto,15171,2021-11-12 11:13:07
ci-bedrock2,15171,2021-11-12 11:13:07
ci-perennial,15171,2021-11-12 11:13:07


In [30]:
minimization_success_comments = coqbot_comments[coqbot_comments['body'].str.startswith('Minimized File') & ~coqbot_comments['body'].str.contains('interrupted by timeout, being automatically continued')]
minimization_failure_comments = coqbot_comments[coqbot_comments['body'].str.startswith('Error: Could not minimize file')]
minimization_finished_comments = minimization_success_comments.assign(success=True).append(minimization_failure_comments.assign(success=False))
targets = minimization_finished_comments['body'].str.extract(r'(?P<target>ci-[^)]*)')
minimization_finished_comments = targets.join(minimization_finished_comments).sort_values('date').drop_duplicates(subset=['target','number']).set_index(['target','number'])[['date','success']]
minimization_finished_comments

Unnamed: 0_level_0,Unnamed: 1_level_0,date,success
target,number,Unnamed: 2_level_1,Unnamed: 3_level_1
ci-mathcomp,13969,2021-05-21 22:37:15,False
ci-iris,13969,2021-05-26 14:13:55,False
ci-equations,13969,2021-05-26 14:14:19,True
ci-fourcolor,13969,2021-05-26 14:15:27,True
ci-perennial,13969,2021-05-26 14:16:08,True
...,...,...,...
ci-fiat_crypto,15171,2021-11-12 11:22:06,True
ci-iris,15171,2021-11-12 11:36:46,True
ci-perennial,15171,2021-11-12 11:38:27,True
ci-hott,15171,2021-11-12 13:31:35,True


In [31]:
minimization_pairs = minimization_started_comments.join(minimization_finished_comments,lsuffix='_start',rsuffix='_end')
minimization_pairs = minimization_pairs.assign(duration=(minimization_pairs['date_end'] - minimization_pairs['date_start']).dt.seconds)
minimization_pairs

Unnamed: 0_level_0,Unnamed: 1_level_0,date_start,date_end,success,duration
target,number,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ci-mathcomp,13969,2021-05-21 22:34:08,2021-05-21 22:37:15,False,187.0
ci-quickchick,13969,2021-05-26 14:10:29,2021-05-26 14:19:23,True,534.0
ci-perennial,13969,2021-05-26 14:10:29,2021-05-26 14:16:08,True,339.0
ci-equations,13969,2021-05-26 14:10:29,2021-05-26 14:14:19,True,230.0
ci-iris,13969,2021-05-26 14:10:29,2021-05-26 14:13:55,False,206.0
...,...,...,...,...,...
ci-hott,15171,2021-11-12 11:13:07,2021-11-12 13:31:35,True,8308.0
ci-fiat_crypto,15171,2021-11-12 11:13:07,2021-11-12 11:22:06,True,539.0
ci-bedrock2,15171,2021-11-12 11:13:07,2021-11-12 11:19:34,True,387.0
ci-perennial,15171,2021-11-12 11:13:07,2021-11-12 11:38:27,True,1520.0


In [32]:
successful_minimization_pairs = minimization_pairs[minimization_pairs['success'] == True][['duration']]
successful_minimization_pairs

Unnamed: 0_level_0,Unnamed: 1_level_0,duration
target,number,Unnamed: 2_level_1
ci-quickchick,13969,534.0
ci-perennial,13969,339.0
ci-equations,13969,230.0
ci-fourcolor,13969,298.0
ci-interval,13895,353.0
...,...,...
ci-iris,15171,1419.0
ci-hott,15171,8308.0
ci-fiat_crypto,15171,539.0
ci-bedrock2,15171,387.0


Proportion of the time the minimizer was able to produce a minimized file:

In [33]:
len(successful_minimization_pairs)/len(minimization_pairs)

0.8451612903225807

In [34]:
successful_minimization_pairs.groupby(['target']).agg(['median','count'])

Unnamed: 0_level_0,duration,duration
Unnamed: 0_level_1,median,count
target,Unnamed: 1_level_2,Unnamed: 2_level_2
ci-aac_tactics,232.0,1
ci-argosy,568.0,3
ci-bbv,427.5,2
ci-bedrock2,3322.0,7
ci-bignums,8529.0,1
ci-category_theory,322.0,3
ci-color,1579.0,4
ci-compcert,1097.5,4
ci-coq_performance_tests,232.0,1
ci-coquelicot,352.0,1


In [35]:
minimization_pairs_reset = minimization_pairs.reset_index()
minimization_pairs_reset[minimization_pairs_reset['target'].isin(['ci-bignums'])]

Unnamed: 0,target,number,date_start,date_end,success,duration
40,ci-bignums,13107,2021-06-29 14:41:10,2021-06-29 17:03:19,True,8529.0


In [36]:
successful_minimization_pairs['duration'].describe(percentiles=[0.5,0.6,0.7,0.8,0.9])

count      131.000000
mean      5399.770992
std      10551.570777
min        165.000000
50%        910.000000
60%       1399.000000
70%       3208.000000
80%       6467.000000
90%      19219.000000
max      71665.000000
Name: duration, dtype: float64