In [3]:
import json
import os
import pandas as pd
import re
import xml.etree.ElementTree as ET

In [4]:
path = input("Path:")
#store all the file names in this list
filelist = []

for root, dirs, files in os.walk(path):
    for file in files:
        #append the file name to the list
        filelist.append(os.path.join(root,file))
new_list = []
for i in filelist:
    if i[-3:-1] == "dy":
        new_list.append(i)
len(new_list)

Path:N:\Revit\Common\Dynamo\Support\Scripts


609

In [5]:
# Initialize lists to store extracted data
data = []

# Function to extract year from file path
def extract_year(file_path):
    match = re.search(r'(\d{4})', file_path)
    return match.group(0) if match else "Unknown"

# Loop through the list of .dyn files
for file_path in new_list:
    file_info = {
        'File Path': file_path,
        'Year': extract_year(file_path),
        'Dynamo Version': None,
        'NodeLibraryDependencies': [],
        'Custom Node Descriptions': [],
        'Python Engine': 'None',
        'Format': 'Unknown'  # To indicate whether the file is JSON or XML
    }
    
    # Initialize a set to keep track of unique Python engines used in the file
    python_engines_used = set()
    
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            content = file.read()
            
            # Try to parse as JSON first
            try:
                dyn_data = json.loads(content)
                file_info['Format'] = 'JSON'
                
                # JSON processing logic
                if "NodeLibraryDependencies" in dyn_data:
                    file_info['Dynamo Version'] = "2020 or later"
                    for dep in dyn_data["NodeLibraryDependencies"]:
                        file_info['NodeLibraryDependencies'].append(dep.get("Name"))
                else:
                    file_info['Dynamo Version'] = "Before 2020"

                if not file_info['NodeLibraryDependencies']:
                    for node in dyn_data.get('Nodes', []):
                        if node.get("ConcreteType", "").startswith("Dynamo.Graph.Nodes.CustomNodes"):
                            file_info['Custom Node Descriptions'].append(node.get("Description"))

                for node in dyn_data.get('Nodes', []):
                    if node.get("ConcreteType", "").startswith("PythonNodeModels.PythonNode"):
                        engine_info = node.get("Engine", "Unknown")
                        python_engines_used.add(engine_info)

            except json.JSONDecodeError:
                # If JSON parsing fails, try to parse as XML
                try:
                    tree = ET.ElementTree(ET.fromstring(content))
                    file_info['Format'] = 'XML'
                    file_info['Dynamo Version'] = "Before 2020"  # XML implies an older version
                    # XML processing logic (placeholder, adjust as necessary)
                except ET.ParseError as e:
                    print(f"XML parse error in file {file_path}: {e}")
                    file_info['Format'] = 'Error'
                    continue  # Skip to the next file

    except Exception as e:
        print(f"General error processing file {file_path}: {e}")
        file_info['Format'] = 'Error'
        continue  # Skip to the next file

    # Determine the Python engine field value
    if python_engines_used:
        if "IronPython2" in python_engines_used and "CPython3" in python_engines_used:
            file_info['Python Engine'] = "Both"
        elif "IronPython2" in python_engines_used:
            file_info['Python Engine'] = "IronPython2"
        elif "CPython3" in python_engines_used:
            file_info['Python Engine'] = "CPython3"
    else:
        file_info['Python Engine'] = 'None'  # Use 'None' if no engines are found

    # Append the file info to the data list
    data.append(file_info)

# Convert the list of dictionaries to a pandas DataFrame and reorder the columns
df = pd.DataFrame(data)
column_order = [
    'File Path', 'Year', 'Dynamo Version', 'NodeLibraryDependencies', 
    'Custom Node Descriptions', 'Python Engine', 'Format'
]
df = df[column_order]

# Display and save the DataFrame
print(df)
df.to_csv('dynamo_analysis.csv', index=False)  # Adjust the path as needed for Jupyter notebook


                                             File Path     Year  \
0    N:\Revit\Common\Dynamo\Support\Scripts\_Dynamo...  Unknown   
1    N:\Revit\Common\Dynamo\Support\Scripts\2019\20...     2019   
2    N:\Revit\Common\Dynamo\Support\Scripts\2019\E_...     2019   
3    N:\Revit\Common\Dynamo\Support\Scripts\2019\E_...     2019   
4    N:\Revit\Common\Dynamo\Support\Scripts\2019\E_...     2019   
..                                                 ...      ...   
604  N:\Revit\Common\Dynamo\Support\Scripts\Testing...     2023   
605  N:\Revit\Common\Dynamo\Support\Scripts\Testing...  Unknown   
606  N:\Revit\Common\Dynamo\Support\Scripts\Testing...  Unknown   
607  N:\Revit\Common\Dynamo\Support\Scripts\Testing...  Unknown   
608  N:\Revit\Common\Dynamo\Support\Scripts\Testing...  Unknown   

    Dynamo Version NodeLibraryDependencies  \
0      Before 2020                      []   
1      Before 2020                      []   
2      Before 2020                      []   
3      Befo