In [2]:
import os
import requests
import pandas as pd
import numpy as np
from dotenv import load_dotenv
from collections import Counter

In [None]:
# Load environment variables from .env file
load_dotenv()

In [3]:
users_ids = pd.read_csv("../../data/processed/users_ids.csv", encoding='latin1')
users_ids = users_ids['id_usuario'].to_list()

In [1]:
def get_repositories_data(user_ids):
    """
    Retrieves repositories data for a given list of user IDs.

    Parameters:
    - user_ids (list): List of user IDs.

    Returns:
    - repositories (dict): Dictionary containing repositories data.
        - 'user_id' (list): List of user IDs.
        - 'has_moz_owner' (list): List indicating whether a repository has a Mozilla owner.
        - 'fav_lang' (list): List of favorite programming languages for each user.
        - 'fav_topic' (list): List of favorite topics for each user.
    """
    
    token = os.getenv('GITHUB_TOKEN')
    headers = {'Authorization': f'Token {token}', 'Accept': 'application/vnd.github.v3+json'}
    
    params = {
        'per_page': 100,  # Number of items per page
        'page': 1,  # Initial page
    }

    repositories = {
        'user_id': [],
        'has_moz_owner': [],
        'fav_lang': [],
        'fav_topic': [],
    }
    
    owner_ids = []
    languages = []
    topics = []

    for user_id in user_ids:
        
        url = f"https://api.github.com/user/{user_id}/starred"
        
        while True:

            response = requests.get(url, headers=headers, params=params)

            if response.status_code == 200:

                data = response.json()
                
                if len(data) > 0 :
                    
                    for repo in data:

                        owner_ids.append(repo['owner']['id'])
                        languages.append(repo['language'])

                        for topic in repo['topics']:
                            topics.append(topic)

                    if 'next' in response.links:
                        url = response.links['next']['url'] 
                        params = {}
                        
                    else:
                        repositories['user_id'].append(user_id)
                        repositories['has_moz_owner'].append(np.intersect1d(np.array(owner_ids), np.array(user_ids)).size > 0) 
                        
                        if len(languages) > 0:
                            repositories['fav_lang'].append(Counter(languages).most_common(1)[0][0])
                        else:
                            repositories['fav_lang'].append(None)
                        
                        if len(topics) > 0:
                            repositories['fav_topic'].append(Counter(topics).most_common(1)[0][0])
                        else:
                            repositories['fav_topic'].append(None)

                        owner_ids = []
                        languages = []
                        topics = []

                        break
                else: 
                    break
                        
            else:
                print('Request Error:', response.status_code)
                break

    return repositories


# Example usage
final_data = get_repositories_data(user_ids)

users_data = pd.DataFrame(final_data)
users_data.to_csv("../../data/processed/starred.csv", index=False, encoding='latin1')

print(f'final_data: {final_data}')
