In [1]:
from dotenv import load_dotenv
import os

load_dotenv(dotenv_path='../.env')     
token = os.getenv('token')

In [65]:
import requests

class GitHubUsersFetcher:
    def __init__(self, token):
        self.token = token
        self.headers = {
            'Authorization': f'token {self.token}',
            'Accept': 'application/vnd.github.cloak-preview'  # for accessing commit search
        }


    def getPublicActivity(self, user,start_date,end_date):
        activity_url = f"https://api.github.com/search/commits?q=committer-name:{user}+committer-date:{start_date}..{end_date}&page=1"
        response = requests.get(activity_url, headers=self.headers)
        if response.status_code == 200:
            activities = response.json()
            return len(activities)
        else:
            print(f"Failed to fetch public activities for {user}: {response.status_code}")
            return 0
            
    def get_recent_commit_users(self,page):
        commits_url = 'https://api.github.com/search/commits?q=-no_user+committer-date:2022-11-30..2022-12-30&sort=created&order=asc&page=1'
        print(commits_url)
        response = requests.get(commits_url, headers=self.headers)
        if response.status_code == 200:
            commits = response.json()["items"]
            users = {commit['commit']['committer']['name'] for commit in commits if commit['commit']['committer']['name']!="GitHub"}
            valid_users = []
            for user in users:
                public_activity = self.getPublicActivity(user,'2022-11-30','2023-5-30')
                if public_activity > 30:
                    valid_users.append(user)
                    print(user)
            return users
        else:
            print(f"Failed to fetch commits: {response.status_code}")
            return []
    def write(self, users, filename="../github_data/users.csv"):
        with open(filename, mode='a', newline='') as file:
            writer = csv.writer(file)
            for user in users:
                writer.writerow([user])
        
    def fetch_users(self, target_user_count=100):
        all_users = set()
    

        page = 1
        while len(all_users) < target_user_count:
            users = self.get_recent_commit_users(page)
            self.write(users)
            print(users)
            all_users.update(users)
            if len(users) < 100:  # Break if the last page had less than 100 users, likely the last page
                break
            page += 1
            if len(all_users) >= target_user_count:
                break
        
        return list(all_users)[:target_user_count]



fetcher = GitHubUsersFetcher(token)
users = fetcher.fetch_users()
print(users)


https://api.github.com/search/commits?q=-no_user+committer-date:2022-11-30..2022-12-30&sort=created&order=asc&page=1
{'Mateus', 'drewlab', 'Michele Dalle Rive', 'mfaishal882', 'Olivia Guyot', 'Jarkko Heinonen', 'minvoo', 'Tom Flídr', 'Mark Johnson', 'srivm', 'PedroMonteiro1511', 'GonzoEnes', 'Abdul Moiz Lakhani', 'Ashley Hoath', 'Joshua Britain'}
['Mateus', 'drewlab', 'Michele Dalle Rive', 'mfaishal882', 'Olivia Guyot', 'Jarkko Heinonen', 'minvoo', 'Tom Flídr', 'Mark Johnson', 'srivm', 'PedroMonteiro1511', 'GonzoEnes', 'Abdul Moiz Lakhani', 'Ashley Hoath', 'Joshua Britain']


In [69]:
import requests
import csv

#https://api.github.com/search/commits?q=committer-name:jhordyess+committer-date:2020-05-30..2023-05-30&page=1
class GitHubCommits:
    def __init__(self, token):
        self.token = token
        self.headers = {
            'Authorization': f'token {self.token}',
            'Accept': 'application/vnd.github.cloak-preview'  # for accessing commit search
        }

    def get_commits(self, user, start_date, end_date):
        page = 1
        commits = []
        max_retries = 3 
        retries = 0
        while True:
            commit_search_url = f"https://api.github.com/search/commits?q=committer-name:{user}+committer-date:{start_date}..{end_date}&page={page}"
            print(commit_search_url)
            response = requests.get(commit_search_url, headers=self.headers)
            if response.status_code == 200:
                data = response.json()
                commits.extend(data['items'])
                if 'next' not in response.links:
                    break
                page += 1
            else:
                print(f"Failed to fetch page {page}: {response.status_code}")
                break
        return commits

    def filter_commits_by_date(self, commits, start_date, end_date):
        filtered_commits = [
            commit for commit in commits
            if start_date < commit["commit"]["committer"]["date"] < end_date
        ]
        return filtered_commits

    def get_lines_and_commit_count(self, commits):
        total_lines_added = 0
        number_of_commits = len(commits)

        for commit in commits:
            commit_url = commit["url"]
            response = requests.get(commit_url, headers=self.headers)
            if response.status_code == 200:
                commit_data = response.json()
                lines_added = commit_data['stats']['total']
                if lines_added > 30000:
                    number_of_commits -= 1
                    continue
                total_lines_added += lines_added
            else:
                print(f"Failed to fetch commit details: {response.status_code}")
                if retries < max_retries:
                    retries += 1
                    time.sleep(5)  
                    continue
                else:
                    break 

        return total_lines_added, number_of_commits

    def write(self, count1, lines1, count2, lines2):
         with open("../github_data/commits.csv", mode='a', newline='') as file:
            writer = csv.writer(file)
            try:
                file.seek(0)  
                if file.read(1):  
                    file.seek(0, 2)  # Go back to the end of the file if it's not empty
                else:
                    writer.writerow(['Period', 'Number of Commits', 'Lines Added'])  # Write headers if file is empty
            except IOError:
                writer.writerow(['Period', 'Number of Commits', 'Lines Added'])  # Write headers if checking raises IOError
            writer.writerow(['Period 1', count1, lines1])
            writer.writerow(['Period 2', count2, lines2])

    def run_analysis(self, users, start_date1, end_date1, start_date2, end_date2):
        for user in users:
            print(user)
            user = "mfaishal882"
            commits1 = self.get_commits(user,start_date1,end_date1)
            commits2 = self.get_commits(user,start_date2,end_date2)
            if len(commits1) < 10 or len(commits2) < 10:
                print(f"Period 1: {len(commits1)} commits")
                print(f"Period 2: {len(commits2)} commits")
                continue
            
            lines1, count1 = self.get_lines_and_commit_count(commits1)
            lines2, count2 = self.get_lines_and_commit_count(commits2)
    
            print(f"Period 1: {count1} commits, {lines1} lines added")
            print(f"Period 2: {count2} commits, {lines2} lines added")
            
            self.write(count1,lines1,count2, lines2)
       


github_fetcher = GitHubCommits(token)
github_fetcher.run_analysis(users, '2022-05-30', '2022-11-30', '2022-11-30', '2023-05-30')


Mateus
https://api.github.com/search/commits?q=committer-name:mfaishal882+committer-date:2022-05-30..2022-11-30&page=1
https://api.github.com/search/commits?q=committer-name:mfaishal882+committer-date:2022-05-30..2022-11-30&page=2
https://api.github.com/search/commits?q=committer-name:mfaishal882+committer-date:2022-05-30..2022-11-30&page=3
https://api.github.com/search/commits?q=committer-name:mfaishal882+committer-date:2022-05-30..2022-11-30&page=4
https://api.github.com/search/commits?q=committer-name:mfaishal882+committer-date:2022-11-30..2023-05-30&page=1
https://api.github.com/search/commits?q=committer-name:mfaishal882+committer-date:2022-11-30..2023-05-30&page=2
https://api.github.com/search/commits?q=committer-name:mfaishal882+committer-date:2022-11-30..2023-05-30&page=3
https://api.github.com/search/commits?q=committer-name:mfaishal882+committer-date:2022-11-30..2023-05-30&page=4
https://api.github.com/search/commits?q=committer-name:mfaishal882+committer-date:2022-11-30..202

KeyboardInterrupt: 