In [6]:
import requests
from dotenv import load_dotenv
import os
import base64
# secret of github token for faster limit rate 
load_dotenv('secrets.env')
GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
headers = {"Authorization": f"token {GITHUB_TOKEN}"}

In [7]:
def fetch_user_details(username):
    url = f"https://api.github.com/users/{username}"
    response = requests.get(url, headers=headers)
    return response.json()

In [8]:
def parse_user_details(user_details):
    return {
        "name": user_details.get("login", "N/A"), 
        "avatar_url": user_details.get("avatar_url", ""),  
        "profile_url": user_details.get("html_url", ""),
        "repos_url": user_details.get("repos_url", ""),
        "public_repos_count": user_details.get("public_repos", 0),  
        "private_repos_count": user_details.get("total_private_repos", 0), 
        "total_repos": (
            user_details.get("public_repos", 0) + user_details.get("total_private_repos", 0)
        ), 
        "followers_count": user_details.get("followers", 0),  
        "following_count": user_details.get("following", 0), 
    }


In [9]:
def fetch_user_repos(username):
    url = f"https://api.github.com/users/{username}/repos"
    response = requests.get(url, headers=headers)
    return response.json()

In [10]:
def parse_user_repos(repo):
    # extracting only necessary data
    return {
        "name": repo.get("name", "N/A"), 
        "full_name":  repo.get("full_name", "N/A"),
        "default_branch": repo.get("default_branch", "N/A"),
        "html_url": repo.get("html_url", ""),  
        "clone_url": repo.get("clone_url", ""),  
        "description": repo.get("description", "No description available"),  
        "forks_count": repo.get("forks_count", 0),
        "open_issues_count": repo.get("open_issues_count", 0), 
        "pushed_at": repo.get("pushed_at", "Never pushed"), 
        "topics": repo.get("topics", []),  
    }

In [11]:
def fetch_repo_languages(username, repo_name):
    url = f"https://api.github.com/repos/{username}/{repo_name}/languages"
    response = requests.get(url, headers=headers)
    return response.json()

In [12]:
def fetch_repo_readme(username, repo_name):
    url = f"https://api.github.com/repos/{username}/{repo_name}/readme"
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        readme_data = response.json()
        readme_content = requests.get(readme_data['download_url']).text
        return readme_content
    return None

In [13]:
def fetch_repo_file_structure(owner, repo, branch):
    url = f"https://api.github.com/repos/{owner}/{repo}/git/trees/{branch}?recursive=1"
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        file_structure = response.json()
        return file_structure
    return None

In [14]:
def get_github_file_content(owner, repo, file_path):
    url = f"https://api.github.com/repos/{owner}/{repo}/contents/{file_path}"
    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        file_info = response.json()
        file_content = base64.b64decode(file_info['content']).decode('utf-8')
        return file_content
    else:
        return {"error": f"Failed to get content: {response.status_code} - {response.json()}"}

In [15]:
#filtering user details
user_details = fetch_user_details("farismuhovic")
user_details = parse_user_details(user_details)

In [16]:
username = user_details["name"]
user_repos = fetch_user_repos(username)

filtered_repos = []
for repo in user_repos:
    repo_name = repo["name"]
    filtered_repo = parse_user_repos(repo)
    filtered_repo["languages"] = fetch_repo_languages(username, repo_name)
    filtered_repo["readme"] = fetch_repo_readme(username, repo_name)
    filtered_repo["structure"] = fetch_repo_file_structure(username,  repo_name , repo["default_branch"])
    filtered_repos.append(filtered_repo)

In [17]:
text_extensions = (
    ".txt", ".html", ".css", ".js", ".ts", ".jsx", ".tsx", 
    ".json", ".yml", ".yaml", ".xml",
    ".py", ".java", ".c", ".cpp", ".h", ".hpp", ".cs", ".sh", ".bat", ".sql", ".rb", ".go", ".rs",
    ".ini", ".cfg", ".properties", ".pl", ".lua", ".swift", ".r", ".m", ".php", ".asp", ".jsp", 
    ".scss", ".less", ".kt", ".gradle", ".svelte", ".dart", ".vb", ".vbs", ".ipynb", ".tf", 
    ".clj", ".erl", ".lhs", ".tex", ".asciidoc", ".adoc", ".rst", ".xhtml", ".vtt", ".srt",
    ".rdf", ".ps1"
)
excluded_filenames = (
    "package-lock.json", "yarn.lock", "npm-debug.log", "composer.lock", 
    "Gemfile.lock", "Pipfile.lock", "requirements.txt", "Dockerfile", 
    ".env", ".env.example", ".DS_Store", ".gitignore"
)

In [18]:

for repo in filtered_repos:
    struct = repo["structure"]
    repo_data = []
    if struct:
        tree = struct["tree"]
        for item in tree:
            # print(item)
            path = item["path"]
            if path.endswith(text_extensions) and not any(excluded_filename in path for excluded_filename in excluded_filenames):
                # print(f"Fetching content for: {path}")
                content = get_github_file_content(username, repo["name"], path)
                # print(content)
                repo_data.append(content)
            else:
                # print(f"Skipping: {path}")
                pass
    repo["repo_data"] = repo_data
    # print("\n")

In [19]:
user_details

{'name': 'FarisMuhovic',
 'avatar_url': 'https://avatars.githubusercontent.com/u/110911021?v=4',
 'profile_url': 'https://github.com/FarisMuhovic',
 'repos_url': 'https://api.github.com/users/FarisMuhovic/repos',
 'public_repos_count': 6,
 'private_repos_count': 30,
 'total_repos': 36,
 'followers_count': 3,
 'following_count': 3}

In [20]:
filtered_repos

[{'name': 'Better-Wordle',
  'full_name': 'FarisMuhovic/Better-Wordle',
  'default_branch': 'master',
  'html_url': 'https://github.com/FarisMuhovic/Better-Wordle',
  'clone_url': 'https://github.com/FarisMuhovic/Better-Wordle.git',
  'description': 'Wordle game replica version from nytimes. With this version you can practice and improve your skills by playing endless rounds a day! The dictionary for this version contains over 13 000 words. Dictionary API that gives u the meaning of word at the end of the game.',
  'forks_count': 0,
  'open_issues_count': 0,
  'pushed_at': '2024-07-15T17:53:32Z',
  'topics': ['react'],
  'languages': {'JavaScript': 156671, 'CSS': 14628, 'HTML': 427},
  'readme': "# Wordle Game Replica\n\n## Description\nWordle Game Replica is a web application that replicates the popular word-guessing game found in the New York Times. It offers an endless number of rounds, allowing you to practice and improve your skills in word guessing. The game is designed to be eng