# Finding Notebook Location

When working in Jupyter notebooks, `__file__` is not available. Here are several methods to find the notebook's location:

In [13]:
# Method 2: Using os.getcwd() and manual path construction
from pathlib import Path

# Get current working directory
current_dir = Path.cwd()
# Since this notebook is in the scripts folder, we can go up one level
ROOT = current_dir.parent if current_dir.name == "scripts" else current_dir

In [14]:
# list of all -merged folders in stubs
merged_folders = [x for x in (ROOT / "stubs").iterdir() if x.is_dir() and "-merged" in x.name]
print(f"Found {len(merged_folders)} merged folders")

Found 103 merged folders


In [15]:
# NO_DOCS = ["(self, *args, **kwargs)", "(cls, *args, **kwargs)", "(*args, **kwargs)"]


# def scan_py_file(pyi: Path):
#     with pyi.open(encoding="utf-8") as f:
#         line_no = 0
#         for line in f:
#             line_no += 1
#             if any(x in line for x in NO_DOCS):
#                 yield pyi, line, line_no


# def scan_folder(folder: Path):
#     for pyi in folder.glob("*.pyi"):
#         # if py.stem in ("umachine", "utime", "uos","uhashlib","ucollections","uio","ujson","ure","ussl","usocket","usys","utime","uzlib"):
#         #     continue
#         yield scan_py_file(pyi)

In [16]:
# not_found = {}

# for folder in merged_folders:
#     if "1_26_0" in folder.name:
#         print(folder.name)
#         for result in scan_folder(folder):
#             for file, line, line_no in result:
#                 # append line to dict of lines
#                 line = line.strip()
#                 not_found.setdefault(line, []).append((file, line_no))

In [17]:
# # write the output to a file

# with open("not_found.txt", "w") as fp:
#     for k in not_found.keys():
#         fp.write("----------------------------------------------------------\n")
#         fp.write(f"{k}\n")
#         for f in not_found[k]:
#             fp.write(f"  -  {f[0]}:{f[1]}\n")
#         fp.write("\n")

In [18]:
# from collections import Counter
# import re

# # Extract file stems and count occurrences
# file_stems = []
# for files_list in not_found.values():
#     for file_info, _ in files_list:
#         # Extract the stem (filename without extension)
#         stem = file_info.stem
#         file_stems.append(stem)

# # Count the occurrences
# stem_counts = Counter(file_stems)

# # Create a sorted list of (stem, count) tuples, sorted by count in descending order
# sorted_stems = sorted(stem_counts.items(), key=lambda x: x[1], reverse=True)

# # Display the results in a formatted table
# print(f"{'Module':<30} | {'Count':<10}")
# print(f"{'-' * 30} | {'-' * 10}")
# for stem, count in sorted_stems:
#     print(f"{stem:<30} | {count:<10}")

# # Print summary
# print(f"\nTotal unique modules: {len(sorted_stems)}")
# print(f"Total occurrences: {sum(stem_counts.values())}")

In [19]:
import libcst as cst
from pathlib import Path
from dataclasses import dataclass
from typing import List, Generator, Optional, Union


@dataclass
class FunctionInfo:
    """Information about a function or method that uses *args, **kwargs"""

    filename: Path
    line_number: int
    module_name: str
    class_context: Optional[str]
    function_name: str
    function_type: str  # 'function' or 'method'
    signature: str
    has_args: bool
    has_kwargs: bool


class FunctionVisitor(cst.CSTVisitor):
    """LibCST visitor to find functions and methods with *args, **kwargs"""

    def __init__(self, filename: Path):
        self.filename = filename
        self.module_name = filename.stem
        self.functions: List[FunctionInfo] = []
        self.class_stack: List[str] = []

    def visit_ClassDef(self, node: cst.ClassDef) -> Optional[bool]:
        """Track class context"""
        class_name = node.name.value
        self.class_stack.append(class_name)
        return None

    def leave_ClassDef(self, node: cst.ClassDef) -> None:
        """Exit class context"""
        self.class_stack.pop()

    def visit_FunctionDef(self, node: cst.FunctionDef) -> Optional[bool]:
        """Visit function definitions"""
        function_name = node.name.value

        # Check if function has *args, **kwargs
        has_args = False
        has_kwargs = False

        if node.params:
            # Check for *args
            if node.params.star_arg and isinstance(node.params.star_arg, cst.Param):
                if node.params.star_arg.name.value in ("args", "*args"):
                    has_args = True

            # Check for **kwargs
            if node.params.star_kwarg and isinstance(node.params.star_kwarg, cst.Param):
                if node.params.star_kwarg.name.value in ("kwargs", "**kwargs"):
                    has_kwargs = True

        # Only process functions that have *args or **kwargs (or both)
        if has_args or has_kwargs:
            # Get line number
            line_number = getattr(node, "lineno", 0)

            # Determine if it's a method or function
            is_method = len(self.class_stack) > 0
            function_type = "method" if is_method else "function"

            # Get class context
            class_context = ".".join(self.class_stack) if self.class_stack else None

            # Build signature string
            signature = self._build_signature_string(node)

            function_info = FunctionInfo(
                filename=self.filename,
                line_number=line_number,
                module_name=self.module_name,
                class_context=class_context,
                function_name=function_name,
                function_type=function_type,
                signature=signature,
                has_args=has_args,
                has_kwargs=has_kwargs,
            )

            self.functions.append(function_info)

        return None

    def _build_signature_string(self, node: cst.FunctionDef) -> str:
        """Build a readable signature string"""
        params = []

        if node.params:
            # Regular parameters
            for param in node.params.params:
                params.append(param.name.value)

            # *args parameter
            if node.params.star_arg:
                if isinstance(node.params.star_arg, cst.Param):
                    params.append(f"*{node.params.star_arg.name.value}")
                else:
                    params.append("*")

            # Keyword-only parameters
            for param in node.params.kwonly_params:
                params.append(param.name.value)

            # **kwargs parameter
            if node.params.star_kwarg:
                params.append(f"**{node.params.star_kwarg.name.value}")

        return f"({', '.join(params)})"


def scan_stub_file_with_libcst(pyi_file: Path) -> Generator[FunctionInfo, None, None]:
    """Scan a single .pyi file using libcst to find functions with *args, **kwargs"""
    try:
        with pyi_file.open(encoding="utf-8") as f:
            content = f.read()

        # Parse the file with libcst
        tree = cst.parse_module(content)

        # Create visitor and traverse the tree
        visitor = FunctionVisitor(pyi_file)
        tree.visit(visitor)

        # Yield all found functions
        for func_info in visitor.functions:
            yield func_info

    except Exception as e:
        print(f"Error parsing {pyi_file}: {e}")


def scan_folder_with_libcst(folder: Path) -> Generator[FunctionInfo, None, None]:
    """Scan all .pyi files in a folder using libcst"""
    for pyi_file in folder.glob("*.pyi"):
        yield from scan_stub_file_with_libcst(pyi_file)


# Test the new libcst-based approach
print("Testing libcst-based function scanner...")

Testing libcst-based function scanner...


In [20]:
# Run the enhanced scanner on merged folders
enhanced_results = []

for folder in merged_folders:
    if "1_26_0" in folder.name:
        print(f"\nScanning folder: {folder.name}")

        count = 0
        for func_info in scan_folder_with_libcst(folder):
            enhanced_results.append(func_info)
            count += 1

            # Show first few results as examples
            if count <= 5:
                print(f"  Found: {func_info.module_name}.{func_info.class_context or ''}{func_info.function_name}")

        print(f"  Total functions found in {folder.name}: {count}")

print(f"\nTotal functions with *args/**kwargs found: {len(enhanced_results)}")


Scanning folder: micropython-v1_26_0-esp32-ESP32_GENERIC-merged
  Found: aioespnow.AIOESPNowpeer_count
  Found: aioespnow.AIOESPNowrecv
  Found: aioespnow.AIOESPNowmod_peer
  Found: aioespnow.AIOESPNowirecv
  Found: aioespnow.AIOESPNowstats
  Total functions found in micropython-v1_26_0-esp32-ESP32_GENERIC-merged: 302

Scanning folder: micropython-v1_26_0-esp32-ESP32_GENERIC_C6-merged
  Found: aioespnow.AIOESPNowpeer_count
  Found: aioespnow.AIOESPNowrecv
  Found: aioespnow.AIOESPNowmod_peer
  Found: aioespnow.AIOESPNowirecv
  Found: aioespnow.AIOESPNowstats
  Total functions found in micropython-v1_26_0-esp32-ESP32_GENERIC-merged: 302

Scanning folder: micropython-v1_26_0-esp32-ESP32_GENERIC_C6-merged
  Found: aioespnow.AIOESPNowpeer_count
  Found: aioespnow.AIOESPNowrecv
  Found: aioespnow.AIOESPNowmod_peer
  Found: aioespnow.AIOESPNowirecv
  Found: aioespnow.AIOESPNowstats
  Total functions found in micropython-v1_26_0-esp32-ESP32_GENERIC_C6-merged: 298

Scanning folder: micropytho

In [26]:
# Display detailed results
import pandas as pd
from collections import defaultdict

# Convert results to a more readable format
results_data = []
for func_info in enhanced_results:
    full_name = func_info.function_name
    if func_info.class_context:
        full_name = f"{func_info.class_context}.{func_info.function_name}"

    results_data.append(
        {
            "Module": func_info.module_name,
            "Class": func_info.class_context or "",
            "Function": func_info.function_name,
            "Full Name": full_name,
            "Type": func_info.function_type,
            "Signature": func_info.signature,
            "Has *args": func_info.has_args,
            "Has **kwargs": func_info.has_kwargs,
            "Line": func_info.line_number,
            "File": func_info.filename.name,
        }
    )

# Create DataFrame for easier analysis
df = pd.DataFrame(results_data)
df.head()

Unnamed: 0,Module,Class,Function,Full Name,Type,Signature,Has *args,Has **kwargs,Line,File
0,aioespnow,AIOESPNow,peer_count,AIOESPNow.peer_count,method,"(self, *args, **kwargs)",True,True,0,aioespnow.pyi
1,aioespnow,AIOESPNow,recv,AIOESPNow.recv,method,"(self, *args, **kwargs)",True,True,0,aioespnow.pyi
2,aioespnow,AIOESPNow,mod_peer,AIOESPNow.mod_peer,method,"(self, *args, **kwargs)",True,True,0,aioespnow.pyi
3,aioespnow,AIOESPNow,irecv,AIOESPNow.irecv,method,"(self, *args, **kwargs)",True,True,0,aioespnow.pyi
4,aioespnow,AIOESPNow,stats,AIOESPNow.stats,method,"(self, *args, **kwargs)",True,True,0,aioespnow.pyi


In [35]:
# Filter dataframe for entries with both *args and **kwargs
df2 = df[(df['Has *args'] == True) & (df['Has **kwargs'] == True)]

# Count unique function full names
unique_fullnames = df2['Full Name'].nunique()
print(f"Total functions with both *args and **kwargs: {len(df2)}")
print(f"Unique function names with both *args and **kwargs: {unique_fullnames}")

# Show the most common function names
print("\nMost common function names with both *args and **kwargs:")
fullname_counts = df2['Full Name'].value_counts().head(10)
for name, count in fullname_counts.items():
    print(f"{name:<40}: {count}")

# Analyze by module
print("\nModules with most functions using both *args and **kwargs:")
module_counts = df2['Module'].value_counts().head(10)
for module, count in module_counts.items():
    print(f"{module:<25}: {count:>3} functions")


Total functions with both *args and **kwargs: 2350
Unique function names with both *args and **kwargs: 416

Most common function names with both *args and **kwargs:
sha256.digest                           : 24
sha256.update                           : 24
dht_readinto                            : 24
sha1.update                             : 20
sha1.digest                             : 20
reset                                   : 18
const                                   : 17
isenabled                               : 12
FrameBuffer.fill_rect                   : 12
SoftSPI.init                            : 12

Modules with most functions using both *args and **kwargs:
machine                  : 382 functions
vfs                      : 264 functions
ucollections             : 144 functions
onewire                  : 132 functions
dht                      :  96 functions
urandom                  :  77 functions
aioespnow                :  76 functions
_onewire                 :  72 functio

In [None]:

# Display summary
print("=== SUMMARY ===")
print(f"Total functions/methods found: {len(df)}")
print(f"Functions (top-level): {len(df[df['Type'] == 'function'])}")
print(f"Methods (in classes): {len(df[df['Type'] == 'method'])}")
print(f"With *args only: {len(df[(df['Has *args'] == True) & (df['Has **kwargs'] == False)])}")
print(f"With **kwargs only: {len(df[(df['Has *args'] == False) & (df['Has **kwargs'] == True)])}")
print(f"With both *args and **kwargs: {len(df[(df['Has *args'] == True) & (df['Has **kwargs'] == True)])}")

print("\n=== TOP 10 MODULES BY COUNT ===")
module_counts = df["Module"].value_counts().head(10)
for module, count in module_counts.items():
    print(f"{module:<25}: {count:>3} functions/methods")

print("\n=== SAMPLE DETAILED RESULTS ===")
print(df[["Module", "Full Name", "Type", "Signature", "Line"]].head(10).to_string(index=False))

In [22]:
df.head

<bound method NDFrame.head of          Module      Class    Function             Full Name      Type  \
0     aioespnow  AIOESPNow  peer_count  AIOESPNow.peer_count    method   
1     aioespnow  AIOESPNow        recv        AIOESPNow.recv    method   
2     aioespnow  AIOESPNow    mod_peer    AIOESPNow.mod_peer    method   
3     aioespnow  AIOESPNow       irecv       AIOESPNow.irecv    method   
4     aioespnow  AIOESPNow       stats       AIOESPNow.stats    method   
...         ...        ...         ...                   ...       ...   
2654   _onewire              writebyte             writebyte  function   
2655   _onewire               writebit              writebit  function   
2656   _onewire                   crc8                  crc8  function   
2657   _onewire               readbyte              readbyte  function   
2658   _onewire                readbit               readbit  function   

                    Signature  Has *args  Has **kwargs  Line           File  
0  

In [None]:
# # Count occurrences of each full method/function name
# fullname_counts = df['Full Name'].value_counts()

# # Create a DataFrame from the Series
# fullname_df = pd.DataFrame({
#     'Full Name': fullname_counts.index,
#     'Count': fullname_counts.values
# })

# # Sort by count in descending order
# fullname_df = fullname_df.sort_values('Count', ascending=False)

# # Display the top 20 results
# print(f"Total unique method/function names: {len(fullname_df)}")
# print("\nTop 20 most common methods/functions with *args, **kwargs:")
# fullname_df.head(20)


In [23]:
df["Full Name"].value_counts()

Full Name
ADCBlock.connect     36
sha256.digest        24
sha256.update        24
dht_readinto         24
sha1.digest          20
                     ..
SDCard.power          1
SDCard.present        1
SDCard.readblocks     1
SDCard.read           1
SDCard.write          1
Name: count, Length: 459, dtype: int64

In [24]:
# Export detailed results to files
output_dir = Path(".")

# Export to CSV for data analysis
csv_file = output_dir / "enhanced_undoc_functions.csv"
df.to_csv(csv_file, index=False)
print(f"Exported to CSV: {csv_file}")

# Export to detailed text file
txt_file = output_dir / "enhanced_undoc_functions.txt"
with open(txt_file, "w", encoding="utf-8") as f:
    f.write("FUNCTIONS AND METHODS WITH *ARGS, **KWARGS\n")
    f.write("=" * 50 + "\n\n")

    # Group by module
    grouped = defaultdict(list)
    for func_info in enhanced_results:
        grouped[func_info.module_name].append(func_info)

    for module_name in sorted(grouped.keys()):
        functions = grouped[module_name]
        f.write(f"MODULE: {module_name} ({len(functions)} functions/methods)\n")
        f.write("-" * (len(module_name) + 20) + "\n")

        for func_info in sorted(functions, key=lambda x: (x.class_context or "", x.function_name)):
            location = f"{func_info.filename.name}:{func_info.line_number}"

            if func_info.class_context:
                full_name = f"{func_info.class_context}.{func_info.function_name}"
                f.write(f"  • {func_info.function_type.upper()}: {full_name}{func_info.signature}\n")
            else:
                f.write(f"  • {func_info.function_type.upper()}: {func_info.function_name}{func_info.signature}\n")

            f.write(f"    Location: {location}\n")

            # Show what type of generic parameters it has
            params = []
            if func_info.has_args:
                params.append("*args")
            if func_info.has_kwargs:
                params.append("**kwargs")
            f.write(f"    Parameters: {', '.join(params)}\n\n")

        f.write("\n")

print(f"Exported detailed text file: {txt_file}")

# Export JSON for programmatic use
import json

json_file = output_dir / "enhanced_undoc_functions.json"
json_data = []
for func_info in enhanced_results:
    json_data.append(
        {
            "filename": str(func_info.filename),
            "line_number": func_info.line_number,
            "module_name": func_info.module_name,
            "class_context": func_info.class_context,
            "function_name": func_info.function_name,
            "function_type": func_info.function_type,
            "signature": func_info.signature,
            "has_args": func_info.has_args,
            "has_kwargs": func_info.has_kwargs,
        }
    )

with open(json_file, "w", encoding="utf-8") as f:
    json.dump(json_data, f, indent=2)

print(f"Exported JSON file: {json_file}")

Exported to CSV: enhanced_undoc_functions.csv
Exported detailed text file: enhanced_undoc_functions.txt
Exported JSON file: enhanced_undoc_functions.json
