In [7]:
import ast
import inspect
import importlib.util
import pandas as pd
from pathlib import Path
from typing import Dict, List, Any, Optional
import sys

def extract_function_attributes_metadata(func_obj) -> Dict[str, Any]:
    """Extract function_attributes metadata from a function object"""
    from pyphocorehelpers.function_helpers import get_decorated_function_attributes, is_decorated_with_function_attributes
    
    metadata = {}
    if is_decorated_with_function_attributes(func_obj):
        metadata = get_decorated_function_attributes(func_obj)
    return metadata

def extract_metadata_attributes_metadata(obj) -> Dict[str, Any]:
    """Extract metadata_attributes metadata from a function, method, or class object"""
    from pyphocorehelpers.programming_helpers import get_decorated_metadata_attributes, is_decorated_with_metadata_attributes
    
    metadata = {}
    if is_decorated_with_metadata_attributes(obj):
        metadata = get_decorated_metadata_attributes(obj)
    return metadata

def parse_ast_node(node: ast.AST, module_path: str, module_name: str) -> List[Dict[str, Any]]:
    """Parse AST node and extract function/class information"""
    results = []
    
    if isinstance(node, ast.FunctionDef):
        # Extract function signature
        args = [arg.arg for arg in node.args.args]
        defaults = [ast.unparse(d) if hasattr(ast, 'unparse') else 'N/A' 
                   for d in node.args.defaults] if node.args.defaults else []
        
        # Get decorators
        decorators = [ast.unparse(d) if hasattr(ast, 'unparse') else d.id 
                     for d in node.decorator_list if isinstance(d, (ast.Name, ast.Call))]
        
        results.append({
            'type': 'function',
            'name': node.name,
            'module': module_name,
            'file_path': module_path,
            'line_number': node.lineno,
            'args': args,
            'defaults': defaults,
            'decorators': decorators,
            'docstring': ast.get_docstring(node),
            'is_async': isinstance(node, ast.AsyncFunctionDef),
        })
    
    elif isinstance(node, ast.ClassDef):
        # Extract class information
        decorators = [ast.unparse(d) if hasattr(ast, 'unparse') else d.id 
                     for d in node.decorator_list if isinstance(d, (ast.Name, ast.Call))]
        
        bases = [ast.unparse(b) if hasattr(ast, 'unparse') else b.id 
                for b in node.bases if isinstance(b, (ast.Name, ast.Call))]
        
        results.append({
            'type': 'class',
            'name': node.name,
            'module': module_name,
            'file_path': module_path,
            'line_number': node.lineno,
            'bases': bases,
            'decorators': decorators,
            'docstring': ast.get_docstring(node),
        })
        
        # Parse methods within the class
        for item in node.body:
            if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
                args = [arg.arg for arg in item.args.args if arg.arg != 'self']
                defaults = [ast.unparse(d) if hasattr(ast, 'unparse') else 'N/A' 
                           for d in item.args.defaults] if item.args.defaults else []
                
                method_decorators = [ast.unparse(d) if hasattr(ast, 'unparse') else d.id 
                                   for d in item.decorator_list if isinstance(d, (ast.Name, ast.Call))]
                
                results.append({
                    'type': 'method',
                    'name': f"{node.name}.{item.name}",
                    'class_name': node.name,
                    'module': module_name,
                    'file_path': module_path,
                    'line_number': item.lineno,
                    'args': args,
                    'defaults': defaults,
                    'decorators': method_decorators,
                    'docstring': ast.get_docstring(item),
                    'is_async': isinstance(item, ast.AsyncFunctionDef),
                })
    
    return results

def parse_python_file(file_path: Path, library_root: Path) -> List[Dict[str, Any]]:
    """Parse a single Python file using AST"""
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()
        
        tree = ast.parse(content, filename=str(file_path))
        
        # Construct module name
        rel_path = file_path.relative_to(library_root)
        module_name = '.'.join(rel_path.parts[:-1] + (rel_path.stem,))
        
        results = []
        for node in ast.walk(tree):
            if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
                # Only process top-level definitions
                if node.lineno and isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
                    node_results = parse_ast_node(node, str(file_path), module_name)
                    results.extend(node_results)
        
        # Remove duplicates and keep only top-level items
        seen = set()
        unique_results = []
        for item in results:
            key = (item['type'], item['name'], item['line_number'])
            if key not in seen:
                seen.add(key)
                unique_results.append(item)
        
        return unique_results
    
    except Exception as e:
        print(f"Error parsing {file_path}: {e}")
        return []

def enrich_with_runtime_metadata(df: pd.DataFrame, library_path: Path) -> pd.DataFrame:
    """Enrich AST-parsed data with runtime metadata (function_attributes, metadata_attributes, signatures, etc.)"""
    # Start with a copy of the original dataframe to preserve all columns
    enriched_df = df.copy()
    
    # Initialize all possible metadata columns with None
    # Known function_attributes keys
    func_attr_keys = ['short_name', 'tags', 'creation_date', 'input_requires', 'output_provides', 
                      'uses', 'used_by', 'related_items', 'conforms_to', 'is_global', 
                      'validate_computation_test', 'requires_global_keys', 'provides_global_keys']
    # Known metadata_attributes keys
    meta_attr_keys = ['short_name', 'tags', 'creation_date', 'input_requires', 'output_provides',
                      'uses', 'used_by', 'related_items', 'pyqt_signals_emitted']
    
    # Initialize columns
    for key in func_attr_keys:
        enriched_df[f'func_attr_{key}'] = None
    for key in meta_attr_keys:
        enriched_df[f'meta_attr_{key}'] = None
    enriched_df['signature'] = None
    
    # Process each row
    for idx, row in df.iterrows():
        # Try to import and get runtime metadata
        try:
            module = importlib.import_module(row['module'])
            obj_type = row['type']
            obj_name = row['name']
            
            obj = None
            if obj_type == 'method':
                # For methods, parse ClassName.method_name format
                if '.' in obj_name:
                    class_name, method_name = obj_name.split('.', 1)
                    class_obj = getattr(module, class_name, None)
                    if class_obj is not None and inspect.isclass(class_obj):
                        # Get the method from the class
                        obj = getattr(class_obj, method_name, None)
            elif obj_type == 'class':
                # For classes, get the class object directly
                obj = getattr(module, obj_name, None)
            else:
                # For functions, get the function object
                obj = getattr(module, obj_name, None)
            
            if obj is not None:
                # Extract function_attributes metadata
                func_attr_metadata = extract_function_attributes_metadata(obj)
                for key, value in func_attr_metadata.items():
                    enriched_df.at[idx, f'func_attr_{key}'] = value
                
                # Extract metadata_attributes metadata
                meta_attr_metadata = extract_metadata_attributes_metadata(obj)
                for key, value in meta_attr_metadata.items():
                    enriched_df.at[idx, f'meta_attr_{key}'] = value
                
                # Get signature if available (for callable objects)
                if callable(obj):
                    try:
                        sig = inspect.signature(obj)
                        enriched_df.at[idx, 'signature'] = str(sig)
                    except:
                        enriched_df.at[idx, 'signature'] = None
        except Exception as e:
            # Module might not be importable, that's okay
            pass
    
    return enriched_df

def parse_python_library(library_path: str, enrich_with_runtime: bool = True) -> pd.DataFrame:
    """Parse entire Python library and return DataFrame"""
    library_path = Path(library_path)
    all_results = []
    
    # Walk through all Python files
    for py_file in library_path.rglob('*.py'):
        # Skip __pycache__ and test files if desired
        if '__pycache__' in str(py_file) or py_file.name.startswith('test_'):
            continue
        
        results = parse_python_file(py_file, library_path)
        all_results.extend(results)
    
    df = pd.DataFrame(all_results)
    
    if enrich_with_runtime and len(df) > 0:
        # Add library path to sys.path temporarily
        library_parent = str(library_path.parent)
        if library_parent not in sys.path:
            sys.path.insert(0, library_parent)
        
        try:
            df = enrich_with_runtime_metadata(df, library_path)
        finally:
            if library_parent in sys.path:
                sys.path.remove(library_parent)
    
    return df


In [8]:

# Usage
# if __name__ == '__main__':
library_path = r'h:\TEMP\Spike3DEnv_ExploreUpgrade\Spike3DWorkEnv\pyPhoPlaceCellAnalysis\src\pyphoplacecellanalysis'

print("Parsing library...")
df = parse_python_library(library_path, enrich_with_runtime=True)

print(f"\nFound {len(df)} items:")
print(df['type'].value_counts())

print("\nFirst few rows:")
print(df.head())

# Save to CSV
# df.to_csv('library_analysis.csv', index=False)
# print("\nSaved to library_analysis.csv")

Parsing library...
Error parsing h:\TEMP\Spike3DEnv_ExploreUpgrade\Spike3DWorkEnv\pyPhoPlaceCellAnalysis\src\pyphoplacecellanalysis\External\burst-detection-master\lib_final_cma.py: Missing parentheses in call to 'print'. Did you mean print("\nRUNNING CUMULATIVE MOVING AVERAGE ANALYSIS\n")? (lib_final_cma.py, line 15)
Error parsing h:\TEMP\Spike3DEnv_ExploreUpgrade\Spike3DWorkEnv\pyPhoPlaceCellAnalysis\src\pyphoplacecellanalysis\External\burst-detection-master\lib_final_ehv.py: Missing parentheses in call to 'print'. Did you mean print("\n\nGenerating basis functions...")? (lib_final_ehv.py, line 162)
Error parsing h:\TEMP\Spike3DEnv_ExploreUpgrade\Spike3DWorkEnv\pyPhoPlaceCellAnalysis\src\pyphoplacecellanalysis\External\burst-detection-master\lib_final_poisson.py: Missing parentheses in call to 'print'. Did you mean print("\n\n===== ===== ===== ===== =====")? (lib_final_poisson.py, line 66)
Error parsing h:\TEMP\Spike3DEnv_ExploreUpgrade\Spike3DWorkEnv\pyPhoPlaceCellAnalysis\src\pypho

In [9]:
list(df.columns)


['type',
 'name',
 'module',
 'file_path',
 'line_number',
 'args',
 'defaults',
 'decorators',
 'docstring',
 'is_async',
 'bases',
 'class_name',
 'func_attr_short_name',
 'func_attr_tags',
 'func_attr_creation_date',
 'func_attr_input_requires',
 'func_attr_output_provides',
 'func_attr_uses',
 'func_attr_used_by',
 'func_attr_related_items',
 'func_attr_conforms_to',
 'func_attr_is_global',
 'func_attr_validate_computation_test',
 'func_attr_requires_global_keys',
 'func_attr_provides_global_keys',
 'meta_attr_short_name',
 'meta_attr_tags',
 'meta_attr_creation_date',
 'meta_attr_input_requires',
 'meta_attr_output_provides',
 'meta_attr_uses',
 'meta_attr_used_by',
 'meta_attr_related_items',
 'meta_attr_pyqt_signals_emitted',
 'signature']