In [None]:
import requests
from time import sleep
from datetime import datetime
import os
import csv
import pandas as pd
import glob
import shutil
import os
from dotenv import load_dotenv

def setup_folder(repo_owner, repo_name):
    """
    Create folder and get the latest CSV file if it exists.
    """
    folder_name = f"{repo_owner}_{repo_name}"
    os.makedirs(folder_name, exist_ok=True)
    
    list_of_files = glob.glob(os.path.join(folder_name, 'stargazer_info_*.csv'))
    latest_file = max(list_of_files, key=os.path.getctime) if list_of_files else None
    
    return folder_name, latest_file

def get_latest_stargazer_data(latest_file):
    """
    Extract latest starred date and number of stargazers from existing CSV.
    """
    if latest_file:
        df = pd.read_csv(latest_file)
        return df['Starred at'].max(), len(df)
    return None, 0

def save_stargazer_data(folder_name, latest_file, stargazer_info):
    """
    Save stargazer information to a CSV file.
    """
    current_datetime = datetime.now().strftime("%Y%m%d_%H%M%S")
    csv_filename = os.path.join(folder_name, f'stargazer_info_{current_datetime}.csv')
    
    # Copy existing data if available
    if latest_file:
        shutil.copy(latest_file, csv_filename)
    
    # Append new data
    with open(csv_filename, 'a', newline='', encoding='utf-8') as csvfile:
        fieldnames = ['Username', 'Name', 'Location', 'Company', 'Email', 
                     'Twitter', 'Followers', 'Starred at', 'Bio']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        
        if os.path.getsize(csv_filename) == 0:
            writer.writeheader()
        
        for user in stargazer_info:
            writer.writerow({
                'Username': user['login'],
                'Name': user['name'],
                'Location': user['location'],
                'Company': user['company'],
                'Email': user['email'],
                'Twitter': user['twitter_username'],
                'Followers': user['followers'],
                'Starred at': user['starred_at'],
                'Bio': user['bio']
            })
    
    return csv_filename

def get_stargazers(owner, repo, token, latest_num_stargazers, limit=None):
    """
    Retrieve all stargazers for a given repository.
    """
    url = f"https://api.github.com/repos/{owner}/{repo}/stargazers"
    headers = {
        "Authorization": f"token {token}",
        "Accept": "application/vnd.github.v3.star+json"
    }
    stargazers = []

    # Given that each GitHub stargazer page has 30 stargazers, we can calculate the number of pages
    # that has already been retrieved by dividing the limit by 30
    page = (latest_num_stargazers // 30) + 1 if latest_num_stargazers else 1
    
    while True:
        try:
            print(f"Fetching page {page} of stargazers...")
            response = requests.get(f"{url}?page={page}", headers=headers)
            
            # Check rate limits before making the next request
            remaining_calls = int(response.headers.get('X-RateLimit-Remaining', 0))
            if remaining_calls < 10:
                reset_time = int(response.headers.get('X-RateLimit-Reset', 0))
                sleep_time = max(reset_time - datetime.now().timestamp(), 0) + 10  # Add buffer
                print(f"Rate limit nearly exceeded. Waiting {sleep_time:.0f} seconds...")
                sleep(sleep_time)
            
            # Handle different response status codes
            if response.status_code == 200:
                page_stargazers = response.json()
                if not page_stargazers:
                    break
                stargazers.extend(page_stargazers)

                print(f"DEBUG: Retrieved {len(stargazers)} stargazers so far")
                print(f"DEBUG: Limit is {limit}")
                print([stargazer['user']['login'] for stargazer in stargazers])

                # Add a limit check
                if limit and len(stargazers) >= limit:
                    break

                # Move to the next page
                page += 1

            elif response.status_code == 403:  # Rate limit exceeded
                reset_time = int(response.headers.get('X-RateLimit-Reset', 0))
                sleep_time = max(reset_time - datetime.now().timestamp(), 0) + 10
                print(f"Rate limit exceeded. Waiting {sleep_time:.0f} seconds...")
                sleep(sleep_time)
            else:
                print(f"Error: Status code {response.status_code}")
                print(f"Response: {response.text}")
                break
                
        except Exception as e:
            print(f"Error occurred: {str(e)}")
            sleep(5)  # Wait before retrying

    return stargazers

def extract_stargazer_info(owner, repo, token, latest_starred_at, latest_num_stargazers, limit=None):
    """
    Extract all information from GitHub profiles that stargaze a given project.
    """
    # Gets all stargazers usernames
    stargazers = get_stargazers(owner, repo, token, latest_num_stargazers, limit)
    
    detailed_stargazers = []
    new_users_count = 0

    for stargazer in stargazers:
        username = stargazer['user']['login']
        starred_at = stargazer['starred_at']
        # Convert starred_at and latest_starred_at to datetime objects for comparison
        starred_at_datetime = datetime.strptime(starred_at, "%Y-%m-%dT%H:%M:%SZ")
        latest_starred_at_datetime = datetime.strptime(latest_starred_at, "%Y-%m-%dT%H:%M:%SZ") if latest_starred_at else None

        # Only proceed if starred_at is after latest_starred_at
        if latest_starred_at_datetime is None or starred_at_datetime > latest_starred_at_datetime:
            
            # Check if we've reached the limit of new users
            if limit and new_users_count >= limit:
                break
            
            print(f"Processing stargazer #{latest_num_stargazers+new_users_count}: {username}")
            user_response = requests.get(f"https://api.github.com/users/{username}", 
                                       headers={"Authorization": f"token {token}"})
            user_info = user_response.json() if user_response.status_code == 200 else None
            
            if user_info:
                user_info['starred_at'] = starred_at
                detailed_stargazers.append(user_info)
                new_users_count += 1
            
            # Respect GitHub's rate limit
            if int(user_response.headers.get('X-RateLimit-Remaining', 0)) < 10:
                sleep(60)  # Wait for a minute if close to rate limit
        else:
            print(f"Already processed stargazer {username}")

    return detailed_stargazers

# Main execution
def main():
    LIMIT = 35
    repo_owner = 'OpenBB-finance'
    repo_name = 'OpenBB'

    load_dotenv()
    github_token = os.getenv('GITHUB_TOKEN')
    if not github_token:
        raise ValueError("GITHUB_TOKEN not found in .env file")
    
    # Setup folder and get latest file
    folder_name, latest_file = setup_folder(repo_owner, repo_name)

    # Get latest stargazer data
    latest_starred_at, latest_num_stargazers = get_latest_stargazer_data(latest_file)

    if latest_file:
        print(f"We have already processed {latest_num_stargazers} stargazers (until {latest_starred_at})")
    
    if LIMIT:
        print(f"We will now fetch the next {LIMIT} stargazers")
    else:
        print(f"We will fetch the remaining stargazers")
        
    # Get new stargazer information
    stargazer_info = extract_stargazer_info(
        repo_owner, repo_name, github_token, 
        latest_starred_at, latest_num_stargazers, 
        limit=LIMIT
    )
        
    # Save the data
    csv_filename = save_stargazer_data(folder_name, latest_file, stargazer_info)
    print(f"Data saved to {csv_filename}")
    
    # Display the results
    df = pd.read_csv(csv_filename)
    print(df)

if __name__ == "__main__":
    main()