From 7ecf89ca8779c9fd057f07a6aee45e64bc522207 Mon Sep 17 00:00:00 2001 From: elsapet Date: Tue, 14 May 2024 16:35:19 +0200 Subject: [PATCH] fix(python): update analyzer for import statements (#1592) * fix(python): update analyzer forimport statements * feat(python): add test for internal statement --- .../python/.snapshots/TestImport--import.yml | 338 ++++++++++++++++++ .../languages/python/analyzer/analyzer.go | 23 ++ internal/languages/python/pattern/pattern.go | 7 +- internal/languages/python/python_test.go | 7 + .../python/testdata/import/import.py | 38 ++ .../languages/python/testdata/import_rule.yml | 48 +++ 6 files changed, 460 insertions(+), 1 deletion(-) create mode 100644 internal/languages/python/.snapshots/TestImport--import.yml create mode 100644 internal/languages/python/testdata/import/import.py create mode 100644 internal/languages/python/testdata/import_rule.yml diff --git a/internal/languages/python/.snapshots/TestImport--import.yml b/internal/languages/python/.snapshots/TestImport--import.yml new file mode 100644 index 000000000..b0d1c4d28 --- /dev/null +++ b/internal/languages/python/.snapshots/TestImport--import.yml @@ -0,0 +1,338 @@ +high: + - rule: + cwe_ids: + - "42" + id: import_test + title: Test detection filter import statements + description: Test detection filter import statements + documentation_url: "" + line_number: 2 + full_filename: import.py + filename: import.py + source: + location: + start: 2 + end: 2 + column: + start: 1 + end: 17 + sink: + location: + start: 2 + end: 2 + column: + start: 1 + end: 17 + content: "" + parent_line_number: 2 + fingerprint: 55db11cd18d0af4114644d01cefbc79d_0 + old_fingerprint: 55db11cd18d0af4114644d01cefbc79d_0 + - rule: + cwe_ids: + - "42" + id: import_test + title: Test detection filter import statements + description: Test detection filter import statements + documentation_url: "" + line_number: 5 + full_filename: import.py + filename: import.py + source: + location: + start: 5 + end: 5 + column: + start: 1 + end: 18 + sink: + location: + start: 5 + end: 5 + column: + start: 1 + end: 18 + content: "" + parent_line_number: 5 + fingerprint: 55db11cd18d0af4114644d01cefbc79d_1 + old_fingerprint: 55db11cd18d0af4114644d01cefbc79d_1 + - rule: + cwe_ids: + - "42" + id: import_test + title: Test detection filter import statements + description: Test detection filter import statements + documentation_url: "" + line_number: 8 + full_filename: import.py + filename: import.py + source: + location: + start: 8 + end: 8 + column: + start: 1 + end: 15 + sink: + location: + start: 8 + end: 8 + column: + start: 1 + end: 15 + content: "" + parent_line_number: 8 + fingerprint: 55db11cd18d0af4114644d01cefbc79d_2 + old_fingerprint: 55db11cd18d0af4114644d01cefbc79d_2 + - rule: + cwe_ids: + - "42" + id: import_test + title: Test detection filter import statements + description: Test detection filter import statements + documentation_url: "" + line_number: 11 + full_filename: import.py + filename: import.py + source: + location: + start: 11 + end: 11 + column: + start: 1 + end: 17 + sink: + location: + start: 11 + end: 11 + column: + start: 1 + end: 17 + content: "" + parent_line_number: 11 + fingerprint: 55db11cd18d0af4114644d01cefbc79d_3 + old_fingerprint: 55db11cd18d0af4114644d01cefbc79d_3 + - rule: + cwe_ids: + - "42" + id: import_test + title: Test detection filter import statements + description: Test detection filter import statements + documentation_url: "" + line_number: 14 + full_filename: import.py + filename: import.py + source: + location: + start: 14 + end: 14 + column: + start: 1 + end: 17 + sink: + location: + start: 14 + end: 14 + column: + start: 1 + end: 17 + content: "" + parent_line_number: 14 + fingerprint: 55db11cd18d0af4114644d01cefbc79d_4 + old_fingerprint: 55db11cd18d0af4114644d01cefbc79d_4 + - rule: + cwe_ids: + - "42" + id: import_test + title: Test detection filter import statements + description: Test detection filter import statements + documentation_url: "" + line_number: 17 + full_filename: import.py + filename: import.py + source: + location: + start: 17 + end: 17 + column: + start: 1 + end: 17 + sink: + location: + start: 17 + end: 17 + column: + start: 1 + end: 17 + content: "" + parent_line_number: 17 + fingerprint: 55db11cd18d0af4114644d01cefbc79d_5 + old_fingerprint: 55db11cd18d0af4114644d01cefbc79d_5 + - rule: + cwe_ids: + - "42" + id: import_test + title: Test detection filter import statements + description: Test detection filter import statements + documentation_url: "" + line_number: 20 + full_filename: import.py + filename: import.py + source: + location: + start: 20 + end: 20 + column: + start: 1 + end: 20 + sink: + location: + start: 20 + end: 20 + column: + start: 1 + end: 20 + content: "" + parent_line_number: 20 + fingerprint: 55db11cd18d0af4114644d01cefbc79d_6 + old_fingerprint: 55db11cd18d0af4114644d01cefbc79d_6 + - rule: + cwe_ids: + - "42" + id: import_test + title: Test detection filter import statements + description: Test detection filter import statements + documentation_url: "" + line_number: 23 + full_filename: import.py + filename: import.py + source: + location: + start: 23 + end: 23 + column: + start: 1 + end: 16 + sink: + location: + start: 23 + end: 23 + column: + start: 1 + end: 16 + content: "" + parent_line_number: 23 + fingerprint: 55db11cd18d0af4114644d01cefbc79d_7 + old_fingerprint: 55db11cd18d0af4114644d01cefbc79d_7 + - rule: + cwe_ids: + - "42" + id: import_test + title: Test detection filter import statements + description: Test detection filter import statements + documentation_url: "" + line_number: 26 + full_filename: import.py + filename: import.py + source: + location: + start: 26 + end: 26 + column: + start: 1 + end: 23 + sink: + location: + start: 26 + end: 26 + column: + start: 1 + end: 23 + content: "" + parent_line_number: 26 + fingerprint: 55db11cd18d0af4114644d01cefbc79d_8 + old_fingerprint: 55db11cd18d0af4114644d01cefbc79d_8 + - rule: + cwe_ids: + - "42" + id: import_test + title: Test detection filter import statements + description: Test detection filter import statements + documentation_url: "" + line_number: 30 + full_filename: import.py + filename: import.py + source: + location: + start: 30 + end: 30 + column: + start: 1 + end: 11 + sink: + location: + start: 30 + end: 30 + column: + start: 1 + end: 11 + content: "" + parent_line_number: 30 + fingerprint: 55db11cd18d0af4114644d01cefbc79d_9 + old_fingerprint: 55db11cd18d0af4114644d01cefbc79d_9 + - rule: + cwe_ids: + - "42" + id: import_test + title: Test detection filter import statements + description: Test detection filter import statements + documentation_url: "" + line_number: 34 + full_filename: import.py + filename: import.py + source: + location: + start: 34 + end: 34 + column: + start: 1 + end: 11 + sink: + location: + start: 34 + end: 34 + column: + start: 1 + end: 11 + content: "" + parent_line_number: 34 + fingerprint: 55db11cd18d0af4114644d01cefbc79d_10 + old_fingerprint: 55db11cd18d0af4114644d01cefbc79d_10 + - rule: + cwe_ids: + - "42" + id: import_test + title: Test detection filter import statements + description: Test detection filter import statements + documentation_url: "" + line_number: 38 + full_filename: import.py + filename: import.py + source: + location: + start: 38 + end: 38 + column: + start: 1 + end: 11 + sink: + location: + start: 38 + end: 38 + column: + start: 1 + end: 11 + content: "" + parent_line_number: 38 + fingerprint: 55db11cd18d0af4114644d01cefbc79d_11 + old_fingerprint: 55db11cd18d0af4114644d01cefbc79d_11 + diff --git a/internal/languages/python/analyzer/analyzer.go b/internal/languages/python/analyzer/analyzer.go index 8136e6f70..a24017670 100644 --- a/internal/languages/python/analyzer/analyzer.go +++ b/internal/languages/python/analyzer/analyzer.go @@ -47,6 +47,8 @@ func (analyzer *analyzer) Analyze(node *sitter.Node, visitChildren func() error) return analyzer.analyzeConditional(node, visitChildren) case "boolean_operator": return analyzer.analyzeBoolean(node, visitChildren) + case "import_statement", "import_from_statement": + return analyzer.analyzeImport(node, visitChildren) case "identifier": return visitChildren() default: @@ -159,6 +161,27 @@ func (analyzer *analyzer) analyzeKeywordArgument(node *sitter.Node, visitChildre return visitChildren() } +// import x +// import a.b +// from z import x +// import x as y (aliased_import) +// from z import x as y (aliased_import) +func (analyzer *analyzer) analyzeImport(node *sitter.Node, visitChildren func() error) error { + children := analyzer.builder.ChildrenExcept(node, node.ChildByFieldName("module_name")) + + for _, child := range children { + switch child.Type() { + case "aliased_import": + aliasedImportIdentifier := child.ChildByFieldName("alias") + analyzer.scope.Declare(analyzer.builder.ContentFor(aliasedImportIdentifier), aliasedImportIdentifier) + case "dotted_name": + analyzer.scope.Declare(analyzer.builder.ContentFor(child.NamedChild(0)), child.NamedChild(0)) + } + } + + return nil +} + // default analysis, where the children are assumed to be aliases func (analyzer *analyzer) analyzeGenericConstruct(node *sitter.Node, visitChildren func() error) error { children := analyzer.builder.ChildrenFor(node) diff --git a/internal/languages/python/pattern/pattern.go b/internal/languages/python/pattern/pattern.go index c20c90c37..14bfb834a 100644 --- a/internal/languages/python/pattern/pattern.go +++ b/internal/languages/python/pattern/pattern.go @@ -17,7 +17,7 @@ var ( matchNodeRegex = regexp.MustCompile(`\$`) ellipsisRegex = regexp.MustCompile(`\$<\.\.\.>`) unanchoredPatternNodeTypes = []string{} - patternMatchNodeContainerTypes = []string{} + patternMatchNodeContainerTypes = []string{"dotted_name"} allowedPatternQueryTypes = []string{"_"} ) @@ -129,6 +129,11 @@ func (*Pattern) IsAnchored(node *tree.Node) (bool, bool) { return false, false } + if (parent.Type() == "import_statement" || parent.Type() == "import_from_statement" || parent.Type() == "relative_import") && + (node.Type() == "dotted_name" || node.Type() == "aliased_import") { + return false, false + } + // Class body declaration_list // function/block compound_statement unAnchored := []string{} diff --git a/internal/languages/python/python_test.go b/internal/languages/python/python_test.go index af95751cf..731c14a32 100644 --- a/internal/languages/python/python_test.go +++ b/internal/languages/python/python_test.go @@ -13,6 +13,9 @@ var loggerRule []byte //go:embed testdata/scope_rule.yml var scopeRule []byte +//go:embed testdata/import_rule.yml +var importRule []byte + func TestFlow(t *testing.T) { testhelper.GetRunner(t, loggerRule, "python").RunTest(t, "./testdata/testcases/flow", ".snapshots/flow/") } @@ -20,3 +23,7 @@ func TestFlow(t *testing.T) { func TestScope(t *testing.T) { testhelper.GetRunner(t, scopeRule, "python").RunTest(t, "./testdata/scope", ".snapshots/") } + +func TestImport(t *testing.T) { + testhelper.GetRunner(t, importRule, "python").RunTest(t, "./testdata/import", ".snapshots/") +} diff --git a/internal/languages/python/testdata/import/import.py b/internal/languages/python/testdata/import/import.py new file mode 100644 index 000000000..7590e7368 --- /dev/null +++ b/internal/languages/python/testdata/import/import.py @@ -0,0 +1,38 @@ +from baz import foo +foo.someMethod() + +from baz import foo as asdf +asdf.someMethod() + +from baz import y as z, a as b, foo as j +j.someMethod() + +from baz import y, a, foo +foo.someMethod() + +import bar +bar.someMethod() + +import xyz, bar +bar.someMethod() + +import bar as qwerty +qwerty.someMethod() + +import yy as zz, bar as bb +bb.someMethod() + +import foo.bat +foo.bat.dottedMethod() + +import FooClass +z = FooClass +z.qwerty() + +from baz import FooClass as Something +x = Something() +x.qwerty() + +import FooClass as SomethingElse +y = SomethingElse() +y.qwerty() \ No newline at end of file diff --git a/internal/languages/python/testdata/import_rule.yml b/internal/languages/python/testdata/import_rule.yml new file mode 100644 index 000000000..17adb2b39 --- /dev/null +++ b/internal/languages/python/testdata/import_rule.yml @@ -0,0 +1,48 @@ +languages: + - python +patterns: + - pattern: $.someMethod($<...>) + filters: + - variable: IMPORT_FROM + detection: import_test_import_from + scope: result + - pattern: $.someMethod($<...>) + filters: + - variable: IMPORT + detection: import_test_import + scope: result + - pattern: $.qwerty($<...>) + filters: + - variable: CLASS + detection: import_test_class_import + scope: result + - pattern: $.bat.dottedMethod($<...>) + filters: + - variable: DOTTED + detection: import_test_dotted_name + scope: result +auxiliary: + - id: import_test_import_from + patterns: + - from baz import $foo + - from baz import foo as $$<_> + - id: import_test_import + patterns: + - import $bar + - import bar as $$<_> + - id: import_test_class_import + patterns: + - import $FooClass + - import FooClass as $$<_> + - from baz import $FooClass + - from baz import FooClass as $$<_> + - id: import_test_dotted_name + patterns: + - import $foo.bat +severity: high +metadata: + description: Test detection filter import statements + remediation_message: Test detection filter import statements + cwe_id: + - 42 + id: import_test