In [None]:
import os
import requests
import argparse
from dotenv import load_dotenv
import base64
import re
import pandas as pd

# Function to check if a file starts with a comment
def check_comment(file_content):
    lines = file_content.split('\n')
    for line in lines:
        stripped_line = line.strip()
        if stripped_line:
            if stripped_line.startswith('#') or stripped_line.startswith('%') or stripped_line.startswith('"""') or stripped_line.startswith("'''"):
                return True
            else:
                return False
    return False

# Function to count the number of files with comments
def count_files_with_comments(repo_url, github_token):
    split_url = repo_url.split('/')
    repo_owner = split_url[-2]
    repo_name = split_url[-1]

    session = requests.Session()
    session.headers.update({'Authorization': f'token {github_token}'})

    total_files = 0
    files_with_comments = 0

    def process_file(file_path):
        nonlocal total_files, files_with_comments
        total_files += 1
        response = session.get(f'https://api.github.com/repos/{repo_owner}/{repo_name}/contents/{file_path}')
        response.raise_for_status()
        data = response.json()
        file_content = base64.b64decode(data['content']).decode('utf-8')
        if check_comment(file_content):
            files_with_comments += 1

    def traverse_directory(directory_path):
        response = session.get(f'https://api.github.com/repos/{repo_owner}/{repo_name}/contents/{directory_path}')
        response.raise_for_status()
        data = response.json()
        for item in data:
            if item['type'] == 'file':
                if item['name'].endswith('.py') or item['name'].endswith('.r') or item['name'].endswith('.R'):
                    process_file(item['path'])
            elif item['type'] == 'dir':
                traverse_directory(f"{directory_path}/{item['name']}")

    traverse_directory('')
    return total_files, files_with_comments

# Main code
if __name__ == "__main__":
    # Load GitHub token from .env file
    load_dotenv()
    github_token = os.getenv('GITHUB_ACCESS_TOKEN')
    
    # Read CSV file with tab delimiter
    df = pd.read_csv('akshay.csv', delimiter=';')

    filtered_df = df[(df['project'] == 'research') & (df['language'].isin(['Python', 'R']))]
    urls = filtered_df['url'].tolist()
    
    # Create empty list to store percentage of files with comments
    percentages = []

    # Iterate over URLs and call the function
    for url in urls:
        print(f"Processing URL: {url}")
        total_files, files_with_comments = count_files_with_comments(url, github_token)
        percentage_with_comments = (files_with_comments / total_files) * 100 if total_files > 0 else 0
        percentages.append(percentage_with_comments)

    # Add new column 'comment_at_start' to the DataFrame
    filtered_df['comment_at_start'] = percentages

    # Save the updated DataFrame back to the CSV file
    filtered_df.to_csv('updated_akshay.csv', index=False)


  df = pd.read_csv('akshay.csv', delimiter=';')


Processing URL: https://api.github.com/repos/aipescience/daiquiri-admin
Processing URL: https://api.github.com/repos/aipescience/django-daiquiri-tap
Processing URL: https://api.github.com/repos/aipescience/lightmeter
Processing URL: https://api.github.com/repos/aipescience/provenance-applause
Processing URL: https://api.github.com/repos/aipescience/queryparser
Processing URL: https://api.github.com/repos/aipescience/raid-utils
Processing URL: https://api.github.com/repos/aipescience/spider-test-data
Processing URL: https://api.github.com/repos/aipescience/uws-client
Processing URL: https://api.github.com/repos/aipescience/verlustdernacht
Processing URL: https://api.github.com/repos/aipescience/votable2sql
Processing URL: https://api.github.com/repos/aodenweller/green-h2-upscaling
Processing URL: https://api.github.com/repos/ATB-Potsdam/jupyterhub_dockerized
Processing URL: https://api.github.com/repos/dshoman/MgII
Processing URL: https://api.github.com/repos/ekaterinailin/flare-locatio