In [1]:
import os
import requests
import pandas as pd
from dotenv import load_dotenv
from datetime import datetime

In [None]:
# Load environment variables from .env file
load_dotenv()

In [None]:
# Load data from CSV file
users_ids = pd.read_csv("../../data/processed/users_ids.csv", encoding='latin1')
users_ids = users_ids['id_usuario'].to_list()

In [1]:
def get_users_data(id_list):
    """
    Retrieves data for GitHub users based on their IDs.

    Args:
        id_list (list): List of GitHub user IDs.

    Returns:
        dict: Dictionary containing user data.
    """
    
    token = os.getenv('GITHUB_TOKEN')
    headers = {'Authorization': f'Token {token}', 'Accept': 'application/vnd.github.v3+json'}
    
    city = ['maputo', 'matola', 'gaza', 'inhambane', 'manica', 'sofala', 'tete', 'zambézia', 'nampula', 'niassa', 'cabo delgado']
    
    user = {
        "id": [],
        "city_id": [],
        "followers": [],
        "following": [],
        "created_at": [],
        "updated_at": [],
    }
    
    for i in id_list:
        url = f"https://api.github.com/user/{i}"
        response = requests.get(url, headers=headers)
        
        if response.status_code == 200:
            data = response.json()
            
            for value in user:
                if value == "city_id": # If the key is "city_id", determine the city ID based on the user's location
                    city_id = ''
                    
                    for index, name in enumerate(city):
                        if name in data['location'].lower():
                            city_id = index
                            break
                    
                    user[value].append(city_id) 
                    
                elif value == "created_at" or value == "updated_at": # If the key is "created_at" or "updated_at", convert the date string to a datetime object and format it
                    date = datetime.strptime(data[value], "%Y-%m-%dT%H:%M:%SZ")
                    date = date.strftime('%Y-%m-%d')
                    
                    user[value].append(date) 
                    
                else:
                    user[value].append(data[value]) 
                
        else:
            print('Request Error:', response.status_code)
            break
            
    return user


final_data = get_users_data(users_ids)

# Create a DataFrame with the new data
filename = "../../data/processed/users_data.csv"
users_data = pd.DataFrame(final_data)
users_data.to_csv(filename, index=False, encoding='latin1')

print('Done')
