# Issues2dataframe

Read the body of a GitHub issue requesting the creation of a new repo.  
Extract relevant fields and write them to a Pandas Dataframe.

In [2]:
import os
from dotenv import load_dotenv

import requests
import pandas as pd 

In [3]:
load_dotenv()

True

In [7]:
def get_issue_content(owner, repo, issue_number, token):
    url = f"https://api.github.com/repos/{owner}/{repo}/issues/{issue_number}"
    headers = {
        "Authorization": f"token {token}",
        "Accept": "application/vnd.github.v3+json"
    }
    
    response = requests.get(url, headers=headers)
    
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Failed to fetch issue {issue_number}: {response.status_code}")
        return None



def fetch_issues_numbers_id(owner, repo, token, label):
    url = f"https://api.github.com/repos/{owner}/{repo}/issues"
    headers = {
        "Authorization": f"token {token}",
        "Accept": "application/vnd.github.v3+json"
    }
    
    issues = []
    page = 1
    
    while True:
        response = requests.get(url, headers=headers, params={'state': 'closed', 'labels': label, 'page': page, 'per_page': 100})
        if response.status_code != 200:
            print(f"Failed to retrieve issues: {response.status_code}")
            break
        
        page_issues = response.json()
        if not page_issues:
            break

        for issue in page_issues:
            if 'pull_request' not in issue:
                #print(issue)
                issues.append({
                    'Issue Number': issue['number'],
                    'Title' : issue['title'],
                    'Node ID' : issue['node_id']
                })
        
        page += 1
    
    return issues



def extract_fields(description, fields):
    extracted_data = {}
    lines = description.splitlines()
    
    for line in lines:
        #print(line)
        for field in fields:
            if line.startswith(field):
                extracted_data[field] = line[len(field)+1:].lstrip()
    
    return extracted_data



def issue_description2dict(description,fields):
    separator = 'SEP'
    text_clean = description.replace('\r\n\r\n', separator).replace('\r\n',separator)
    text_clean = text_clean.split(separator)
    #print(text_clean)
    
    RES = {}
    
    for line_num,line in enumerate(text_clean):
        line = line.strip()
        #print(line_num,line)
         
        for field in fields:
            if line.startswith(field):
                
                if field == "Visibility":
                    #print('OK')
                    visibility_section = ''.join(text_clean[line_num+1:]).split('Purpose')[0]
                    #print(visibility_section)
                    if '- [x] Public'in visibility_section:
                        RES['Private'] = False
                    else:
                        RES['Private'] = True
                    continue
            
                RES[field] = line[len(field)+1:].lstrip()
    
    return RES

## Configuration

In [8]:
owner = "ICSC-CN-HPC-Spoke-4-Earth-Climate"
repo = "administration"
token = os.getenv('ICSC_GITHUB_ORG_PAT')

## Fetch issues numbers

In [9]:
a = fetch_issues_numbers_id(owner,repo,token,label="new_repo")
a

[{'Issue Number': 30,
  'Title': '[Repository Creation] - SEAS-NEMO',
  'Node ID': 'I_kwDOMO4l_86OlylG'},
 {'Issue Number': 29,
  'Title': '[Repository Creation] - WaveCurrent_regional',
  'Node ID': 'I_kwDOMO4l_86Olx8v'},
 {'Issue Number': 28,
  'Title': '[Repository Creation] - ICON-TERRA',
  'Node ID': 'I_kwDOMO4l_86OlxOE'},
 {'Issue Number': 27,
  'Title': '[Repository Creation] - ShyConv',
  'Node ID': 'I_kwDOMO4l_86OlvT2'},
 {'Issue Number': 26,
  'Title': '[Repository Creation] - EffCover',
  'Node ID': 'I_kwDOMO4l_86Olt7k'},
 {'Issue Number': 25,
  'Title': '[Repository Creation] - aero_drag_wrf',
  'Node ID': 'I_kwDOMO4l_86OltGQ'},
 {'Issue Number': 24,
  'Title': '[Repository Creation] - BEP_BEM_Offline',
  'Node ID': 'I_kwDOMO4l_86Olsdr'},
 {'Issue Number': 23,
  'Title': '[Repository Creation] - LandModelModules',
  'Node ID': 'I_kwDOMO4l_86OlrlM'},
 {'Issue Number': 22,
  'Title': '[Repository Creation] - ML_entrainment',
  'Node ID': 'I_kwDOMO4l_86Olp-L'},
 {'Issue Number

In [10]:
df = pd.DataFrame(a).sort_values(by='Issue Number',ascending=True)
df

Unnamed: 0,Issue Number,Title,Node ID
12,15,[Repository Creation] - ML_clouds,I_kwDOMO4l_86Olge_
11,19,[Repository Creation] - OrogDrag,I_kwDOMO4l_86OloN6
10,20,[Repository Creation] - SurfAlb,I_kwDOMO4l_86Olo34
9,21,[Repository Creation] - SoilHydraulics,I_kwDOMO4l_86OlpcD
8,22,[Repository Creation] - ML_entrainment,I_kwDOMO4l_86Olp-L
7,23,[Repository Creation] - LandModelModules,I_kwDOMO4l_86OlrlM
6,24,[Repository Creation] - BEP_BEM_Offline,I_kwDOMO4l_86Olsdr
5,25,[Repository Creation] - aero_drag_wrf,I_kwDOMO4l_86OltGQ
4,26,[Repository Creation] - EffCover,I_kwDOMO4l_86Olt7k
3,27,[Repository Creation] - ShyConv,I_kwDOMO4l_86OlvT2


## Get issues content

In [11]:
fields = ["Repository name","WP","Task","Repository manager","Collaborators","Visibility"]

In [12]:
new_columns = fields.copy()
new_columns.remove("Visibility")
new_columns.append("Private")

df[new_columns] = None
df

Unnamed: 0,Issue Number,Title,Node ID,Repository name,WP,Task,Repository manager,Collaborators,Private
12,15,[Repository Creation] - ML_clouds,I_kwDOMO4l_86Olge_,,,,,,
11,19,[Repository Creation] - OrogDrag,I_kwDOMO4l_86OloN6,,,,,,
10,20,[Repository Creation] - SurfAlb,I_kwDOMO4l_86Olo34,,,,,,
9,21,[Repository Creation] - SoilHydraulics,I_kwDOMO4l_86OlpcD,,,,,,
8,22,[Repository Creation] - ML_entrainment,I_kwDOMO4l_86Olp-L,,,,,,
7,23,[Repository Creation] - LandModelModules,I_kwDOMO4l_86OlrlM,,,,,,
6,24,[Repository Creation] - BEP_BEM_Offline,I_kwDOMO4l_86Olsdr,,,,,,
5,25,[Repository Creation] - aero_drag_wrf,I_kwDOMO4l_86OltGQ,,,,,,
4,26,[Repository Creation] - EffCover,I_kwDOMO4l_86Olt7k,,,,,,
3,27,[Repository Creation] - ShyConv,I_kwDOMO4l_86OlvT2,,,,,,


In [13]:
for n in df['Issue Number']:
    print(f'Retrieving the description of issue #{n}')
    
    issue_content = get_issue_content(owner,repo,n,token)
    #print(issue_content)
    descr = issue_description2dict(issue_content["body"],fields)
    #print(descr)

    # Update dataframe
    for c in new_columns:
        df.loc[df['Issue Number'] == n, c] = descr[c]

df

Retrieving the description of issue #15
Retrieving the description of issue #19
Retrieving the description of issue #20
Retrieving the description of issue #21
Retrieving the description of issue #22
Retrieving the description of issue #23
Retrieving the description of issue #24
Retrieving the description of issue #25
Retrieving the description of issue #26
Retrieving the description of issue #27
Retrieving the description of issue #28
Retrieving the description of issue #29
Retrieving the description of issue #30


Unnamed: 0,Issue Number,Title,Node ID,Repository name,WP,Task,Repository manager,Collaborators,Private
12,15,[Repository Creation] - ML_clouds,I_kwDOMO4l_86Olge_,ML_clouds,3,3.3,fedef17,,True
11,19,[Repository Creation] - OrogDrag,I_kwDOMO4l_86OloN6,OrogDrag,3,3.2,GuidoDavoli,,True
10,20,[Repository Creation] - SurfAlb,I_kwDOMO4l_86Olo34,SurfAlb,3,3.1,francocatalano,,True
9,21,[Repository Creation] - SoilHydraulics,I_kwDOMO4l_86OlpcD,SoilHydraulics,3,3.1,daniele-peano,,True
8,22,[Repository Creation] - ML_entrainment,I_kwDOMO4l_86Olp-L,ML_entrainment,3,3.3,ValerioLembo,"ValerioLembo, turbulent-lab",True
7,23,[Repository Creation] - LandModelModules,I_kwDOMO4l_86OlrlM,LandModelModules,3,3.1,oxana-meteo,"GuidoDavoli, aalessan",True
6,24,[Repository Creation] - BEP_BEM_Offline,I_kwDOMO4l_86Olsdr,BEP_BEM_Offline,3,3.1,gpappac,,True
5,25,[Repository Creation] - aero_drag_wrf,I_kwDOMO4l_86OltGQ,aero_drag_wrf,3,3.2,antoniolita,,True
4,26,[Repository Creation] - EffCover,I_kwDOMO4l_86Olt7k,EffCover,3,3.1,emandica,,True
3,27,[Repository Creation] - ShyConv,I_kwDOMO4l_86OlvT2,ShyConv,3,3.2,scausio,,True


## Save to file

In [14]:
save_file = os.path.join(os.getcwd(),"Repository_list.csv")
df.to_csv(save_file,index=False)