In [1]:
import os
import json
import esprima
from tqdm import tqdm

In [2]:
def extract_code_gadgets(code):
    gadgets = []
    
    try:
        ast = esprima.parseScript(code, {
            'range': True,
            'tolerant': True,
            'jsx': False
        })
    except Exception as e:
        print(f"🔥 Parsing failed: {str(e)}")
        return gadgets

    sensitive_apis = [
        'fs.readFile', 'fs.readFileSync', 'path.join', 'path.resolve',
        'moment.locale', 'require'
    ]

    class GadgetExtractor(esprima.NodeVisitor):
        def __init__(self):
            self.sensitive_operations = []
            self.user_inputs = set()
            self.debug_log = []

        def get_source_code(self, node):
            """Safely extract source code with range validation"""
            if not hasattr(node, 'range') or node.range is None:
                return None
            start, end = node.range
            if start is None or end is None:
                return None
            return code[start:end]

        def visit_VariableDeclarator(self, node):
            try:
                if node.init:
                    source = self.get_source_code(node.init)
                    if source and any(s in source for s in ['req.query', 'req.params', 'req.body']):
                        var_name = node.id.name
                        self.user_inputs.add(var_name)
                        self.sensitive_operations.append({
                            'node': node,
                            'type': 'user_input',
                            'source': self.get_source_code(node)
                        })
            except Exception as e:
                print(f"VariableDeclarator error: {str(e)}")
            self.generic_visit(node)

        def visit_CallExpression(self, node):
            try:
                # Detect require() with user input
                if hasattr(node.callee, 'name') and node.callee.name == 'require':
                    arg_source = self.get_source_code(node.arguments[0])
                    if arg_source and any(var in arg_source for var in self.user_inputs):
                        self.sensitive_operations.append({
                            'node': node,
                            'type': 'dynamic_require',
                            'source': self.get_source_code(node)
                        })

                # Detect sensitive API calls
                if node.callee.type == 'MemberExpression':
                    obj = self.unwind_member_expression(node.callee)
                    method_call = f"{obj}.{node.callee.property.name}"
                    
                    if method_call in sensitive_apis:
                        self.sensitive_operations.append({
                            'node': node,
                            'type': method_call,
                            'source': self.get_source_code(node)
                        })
            except Exception as e:
                print(f"CallExpression error: {str(e)}")
            self.generic_visit(node)

        def unwind_member_expression(self, node):
            """Robust member expression unwinding"""
            parts = []
            try:
                while node.type == 'MemberExpression':
                    parts.append(node.property.name)
                    node = node.object
                base = node.name if hasattr(node, 'name') else self.get_source_code(node)
                return f"{base}.{'.'.join(reversed(parts))}" if parts else base
            except Exception:
                return "UnknownExpression"

    extractor = GadgetExtractor()
    extractor.visit(ast)

    # Safely create gadgets
    for op in extractor.sensitive_operations:
        # Validate node and range
        if not op['source']:
            continue
            
        node = op['node']
        if not hasattr(node, 'range') or node.range is None:
            continue
            
        start, end = node.range
        if start is None or end is None:
            continue

        # Calculate line numbers safely
        try:
            start_line = code[:start].count('\n') + 1
            end_line = code[:end].count('\n') + 1
            context_start = max(0, start_line - 3)  # 0-based index
            context_end = end_line + 1  # +2 lines after
            lines = code.split('\n')
            gadget = {
                'type': op['type'],
                'source': op['source'],
                'context': lines[context_start:context_end],
                'line_numbers': (start_line, end_line)
            }
            gadgets.append(gadget)
        except Exception as e:
            print(f"Skipping gadget due to error: {str(e)}")

    return gadgets

In [3]:
# Configuration
INPUT_DIR = "../SourceCode/Test"  # Folder containing your JS files
OUTPUT_FILE = "processed_gadgets.json"  # Output file for results


def process_dataset():
    js_files = [os.path.join(INPUT_DIR, f) for f in os.listdir(INPUT_DIR) if f.endswith(".js")]
    
    processed_data = []
    for js_file in tqdm(js_files):
        try:
            with open(js_file, "r", encoding="utf-8") as f:
                code = f.read()
            gadgets = extract_code_gadgets(code)
            processed_data.append({
                "file": js_file,
                "gadgets": gadgets
            })
        except Exception as e:
            print(f"Error processing {js_file}: {str(e)}")
    
    with open(OUTPUT_FILE, "w") as f:
        json.dump(processed_data, f, indent=2)

# Run the dataset processing
process_dataset()

with open(OUTPUT_FILE, "r") as f:
    results = json.load(f)

print(json.dumps(results, indent=2))

100%|██████████| 3/3 [00:00<00:00, 367.70it/s]

[
  {
    "file": "../SourceCode/Test/vuln4.js",
    "gadgets": [
      {
        "type": "user_input",
        "source": "locale = req.query.locale || 'en'",
        "context": [
          "",
          "app.get('/time', (req, res) => {",
          "    const locale = req.query.locale || 'en'; ",
          ""
        ],
        "line_numbers": [
          6,
          6
        ]
      }
    ]
  },
  {
    "file": "../SourceCode/Test/vuln.js",
    "gadgets": []
  },
  {
    "file": "../SourceCode/Test/vuln2.js",
    "gadgets": [
      {
        "type": "user_input",
        "source": "fileName = req.query.file",
        "context": [
          "",
          "app.get('/file', (req, res) => {",
          "    const fileName = req.query.file;",
          "    const filePath = path.join(__dirname, fileName);"
        ],
        "line_numbers": [
          9,
          9
        ]
      }
    ]
  }
]



