From 94706aa07e89915d83181684f8a5f81c4666dc33 Mon Sep 17 00:00:00 2001 From: elsapet Date: Fri, 18 Nov 2022 17:33:09 +0200 Subject: [PATCH] feat: add internal option for dataflow output feat: clean up data type uuid output --- .../dataflow/components/components_test.go | 2 +- pkg/report/output/dataflow/dataflow.go | 4 +-- .../dataflow/datatypes/datatypes_test.go | 18 ++++++------ pkg/report/output/dataflow/risks/risks.go | 29 ++++++++++++------- .../output/dataflow/risks/risks_test.go | 18 ++++++------ pkg/report/output/dataflow/types/datatypes.go | 1 + pkg/report/output/dataflow/types/risks.go | 4 +-- pkg/report/output/output.go | 6 ++-- 8 files changed, 46 insertions(+), 36 deletions(-) diff --git a/pkg/report/output/dataflow/components/components_test.go b/pkg/report/output/dataflow/components/components_test.go index f77ecac22..3b0b4fa7a 100644 --- a/pkg/report/output/dataflow/components/components_test.go +++ b/pkg/report/output/dataflow/components/components_test.go @@ -124,7 +124,7 @@ func TestDataflowComponents(t *testing.T) { return } - dataflow, err := dataflow.GetOutput(detections, settings.Config{}) + dataflow, err := dataflow.GetOutput(detections, settings.Config{}, false) if err != nil { t.Fatalf("failed to get detectors output %s", err) return diff --git a/pkg/report/output/dataflow/dataflow.go b/pkg/report/output/dataflow/dataflow.go index 37e7b3f94..4b79a068c 100644 --- a/pkg/report/output/dataflow/dataflow.go +++ b/pkg/report/output/dataflow/dataflow.go @@ -22,9 +22,9 @@ type DataFlow struct { var allowedDetections []detections.DetectionType = []detections.DetectionType{detections.TypeSchemaClassified, detections.TypeCustomClassified, detections.TypeDependencyClassified, detections.TypeInterfaceClassified} -func GetOutput(input []interface{}, config settings.Config) (*DataFlow, error) { +func GetOutput(input []interface{}, config settings.Config, isInternal bool) (*DataFlow, error) { dataTypesHolder := datatypes.New() - risksHolder := risks.New(config) + risksHolder := risks.New(config, isInternal) componentsHolder := components.New() for _, detection := range input { diff --git a/pkg/report/output/dataflow/datatypes/datatypes_test.go b/pkg/report/output/dataflow/datatypes/datatypes_test.go index b712d8060..b505e3782 100644 --- a/pkg/report/output/dataflow/datatypes/datatypes_test.go +++ b/pkg/report/output/dataflow/datatypes/datatypes_test.go @@ -30,7 +30,7 @@ func TestDataflowDataType(t *testing.T) { { Name: "single detection", Config: config, - FileContent: `{"type": "schema_classified", "detector_type":"ruby", "source": {"filename": "./users.rb", "line_number": 25}, "value": {"field_name": "User_name", "classification": {"data_type": {"data_category_name": "Username"} ,"decision":{"state": "valid"}}}}`, + FileContent: `{"type": "schema_classified", "detector_type":"ruby", "source": {"filename": "./users.rb", "line_number": 25}, "value": {"field_name": "User_name", "classification": {"data_type": {"name": "Username"} ,"decision":{"state": "valid"}}}}`, Want: []types.Datatype{ { Name: "Username", @@ -54,8 +54,8 @@ func TestDataflowDataType(t *testing.T) { { Name: "single detection - duplicates", Config: config, - FileContent: `{"type": "schema_classified", "detector_type":"ruby", "source": {"filename": "./users.rb", "line_number": 25}, "value": {"field_name": "User_name", "classification": {"data_type": {"data_category_name": "Username"} ,"decision":{"state": "valid"}}}} -{"type": "schema_classified", "detector_type":"ruby", "source": {"filename": "./users.rb", "line_number": 25}, "value": {"field_name": "User_name", "classification": {"data_type": {"data_category_name": "Username"} ,"decision":{"state": "valid"}}}}`, + FileContent: `{"type": "schema_classified", "detector_type":"ruby", "source": {"filename": "./users.rb", "line_number": 25}, "value": {"field_name": "User_name", "classification": {"data_type": {"name": "Username"} ,"decision":{"state": "valid"}}}} +{"type": "schema_classified", "detector_type":"ruby", "source": {"filename": "./users.rb", "line_number": 25}, "value": {"field_name": "User_name", "classification": {"data_type": {"name": "Username"} ,"decision":{"state": "valid"}}}}`, Want: []types.Datatype{ { Name: "Username", @@ -73,7 +73,7 @@ func TestDataflowDataType(t *testing.T) { { Name: "single detection - with wierd data in report", Config: config, - FileContent: `{"type": "schema_classified", "detector_type":"ruby", "source": {"filename": "./users.rb", "line_number": 25}, "value": {"field_name": "User_name", "classification": {"data_type": {"data_category_name": "Username"} ,"decision":{"state": "valid"}}}} + FileContent: `{"type": "schema_classified", "detector_type":"ruby", "source": {"filename": "./users.rb", "line_number": 25}, "value": {"field_name": "User_name", "classification": {"data_type": {"name": "Username"} ,"decision":{"state": "valid"}}}} {"user": true }`, Want: []types.Datatype{ { @@ -92,8 +92,8 @@ func TestDataflowDataType(t *testing.T) { { Name: "multiple detections - with same object name - deterministic output", Config: config, - FileContent: `{"type": "schema_classified", "detector_type":"ruby", "source": {"filename": "./users.rb", "line_number": 25}, "value": {"field_name": "User_name", "classification": {"data_type": {"data_category_name": "Username"} ,"decision":{"state": "valid"}}}} -{"type": "schema_classified", "detector_type":"csharp", "source": {"filename": "./users.cs", "line_number": 12}, "value": {"field_name": "User_name", "classification": {"data_type": {"data_category_name": "Username"} ,"decision":{"state": "valid"}}}}`, + FileContent: `{"type": "schema_classified", "detector_type":"ruby", "source": {"filename": "./users.rb", "line_number": 25}, "value": {"field_name": "User_name", "classification": {"data_type": {"name": "Username"} ,"decision":{"state": "valid"}}}} +{"type": "schema_classified", "detector_type":"csharp", "source": {"filename": "./users.cs", "line_number": 12}, "value": {"field_name": "User_name", "classification": {"data_type": {"name": "Username"} ,"decision":{"state": "valid"}}}}`, Want: []types.Datatype{ { Name: "Username", @@ -117,8 +117,8 @@ func TestDataflowDataType(t *testing.T) { { Name: "multiple detections - with different names - deterministic output", Config: config, - FileContent: `{"type": "schema_classified", "detector_type":"ruby", "source": {"filename": "./users.rb", "line_number": 25}, "value": {"field_name": "User_name", "classification": {"data_type": {"data_category_name": "Username"} ,"decision":{"state": "valid"}}}} -{"type": "schema_classified", "detector_type":"csharp", "source": {"filename": "./users.cs", "line_number": 12}, "value": {"field_name": "address", "classification": {"data_type": {"data_category_name": "Physical Address"} ,"decision":{"state": "valid"}}}}`, + FileContent: `{"type": "schema_classified", "detector_type":"ruby", "source": {"filename": "./users.rb", "line_number": 25}, "value": {"field_name": "User_name", "classification": {"data_type": {"name": "Username"} ,"decision":{"state": "valid"}}}} +{"type": "schema_classified", "detector_type":"csharp", "source": {"filename": "./users.cs", "line_number": 12}, "value": {"field_name": "address", "classification": {"data_type": {"name": "Physical Address"} ,"decision":{"state": "valid"}}}}`, Want: []types.Datatype{ { Name: "Physical Address", @@ -169,7 +169,7 @@ func TestDataflowDataType(t *testing.T) { return } - dataflow, err := dataflow.GetOutput(detections, test.Config) + dataflow, err := dataflow.GetOutput(detections, test.Config, false) if err != nil { t.Fatalf("failed to get detectors output %s", err) return diff --git a/pkg/report/output/dataflow/risks/risks.go b/pkg/report/output/dataflow/risks/risks.go index 133e47d5f..33a813329 100644 --- a/pkg/report/output/dataflow/risks/risks.go +++ b/pkg/report/output/dataflow/risks/risks.go @@ -12,8 +12,9 @@ import ( ) type Holder struct { - detectors map[string]detectorHolder // group datatypeHolders by name - config settings.Config + detectors map[string]detectorHolder // group datatypeHolders by name + config settings.Config + isInternal bool } type detectorHolder struct { @@ -31,10 +32,11 @@ type fileHolder struct { lineNumber map[int]int } -func New(config settings.Config) *Holder { +func New(config settings.Config, isInternal bool) *Holder { return &Holder{ - detectors: make(map[string]detectorHolder), - config: config, + detectors: make(map[string]detectorHolder), + config: config, + isInternal: isInternal, } } @@ -64,11 +66,18 @@ func (holder *Holder) addDatatype(ruleName string, datatype *db.DataType, fileNa detector := holder.detectors[ruleName] // create datatype entry if it doesn't exist if _, exists := detector.datatypes[datatype.Name]; !exists { - detector.datatypes[datatype.Name] = &datatypeHolder{ - name: datatype.Name, - uuid: datatype.UUID, - categoryUUID: datatype.CategoryUUID, - files: make(map[string]*fileHolder), + if holder.isInternal { + detector.datatypes[datatype.Name] = &datatypeHolder{ + name: datatype.Name, + uuid: datatype.UUID, + categoryUUID: datatype.CategoryUUID, + files: make(map[string]*fileHolder), + } + } else { + detector.datatypes[datatype.Name] = &datatypeHolder{ + name: datatype.Name, + files: make(map[string]*fileHolder), + } } } diff --git a/pkg/report/output/dataflow/risks/risks_test.go b/pkg/report/output/dataflow/risks/risks_test.go index 820238be4..f41b3041b 100644 --- a/pkg/report/output/dataflow/risks/risks_test.go +++ b/pkg/report/output/dataflow/risks/risks_test.go @@ -29,7 +29,7 @@ func TestDataflowRisks(t *testing.T) { { Name: "single detection", Config: config, - FileContent: `{"type": "custom_classified", "detector_type":"rails_leak", "source": {"filename": "./users.rb", "line_number": 25}, "value": {"field_name": "User_name", "classification": {"data_type": {"data_category_name": "Username"} ,"decision":{"state": "valid"}}}}`, + FileContent: `{"type": "custom_classified", "detector_type":"rails_leak", "source": {"filename": "./users.rb", "line_number": 25}, "value": {"field_name": "User_name", "classification": {"data_type": {"name": "Username", "uuid": "123", "category_uuid": "456"} ,"decision":{"state": "valid"}}}}`, Want: []types.RiskDetector{ { DetectorID: "rails_leak", @@ -54,8 +54,8 @@ func TestDataflowRisks(t *testing.T) { { Name: "single detection - duplicates", Config: config, - FileContent: `{"type": "custom_classified", "detector_type":"rails_leak", "source": {"filename": "./users.rb", "line_number": 25}, "value": {"field_name": "User_name", "classification": {"data_type": {"data_category_name": "Username"} ,"decision":{"state": "valid"}}}} -{"type": "custom_classified", "detector_type":"rails_leak", "source": {"filename": "./users.rb", "line_number": 25}, "value": {"field_name": "User_name", "classification": {"data_type": {"data_category_name": "Username"} ,"decision":{"state": "valid"}}}}`, + FileContent: `{"type": "custom_classified", "detector_type":"rails_leak", "source": {"filename": "./users.rb", "line_number": 25}, "value": {"field_name": "User_name", "classification": {"data_type": {"name": "Username", "uuid": "123", "category_uuid": "456"} ,"decision":{"state": "valid"}}}} +{"type": "custom_classified", "detector_type":"rails_leak", "source": {"filename": "./users.rb", "line_number": 25}, "value": {"field_name": "User_name", "classification": {"data_type": {"name": "Username", "uuid": "123", "category_uuid": "456"} ,"decision":{"state": "valid"}}}}`, Want: []types.RiskDetector{ { DetectorID: "rails_leak", @@ -74,7 +74,7 @@ func TestDataflowRisks(t *testing.T) { { Name: "single detection - stored", Config: config, - FileContent: `{"type": "custom_classified", "detector_type":"ruby_leak", "source": {"filename": "./users.rb", "line_number": 25}, "value": {"field_name": "User_name", "classification": {"data_type": {"data_category_name": "Username"} ,"decision":{"state": "valid"}}}}`, + FileContent: `{"type": "custom_classified", "detector_type":"ruby_leak", "source": {"filename": "./users.rb", "line_number": 25}, "value": {"field_name": "User_name", "classification": {"data_type": {"name": "Username", "uuid": "123", "category_uuid": "456"} ,"decision":{"state": "valid"}}}}`, Want: []types.RiskDetector{ { DetectorID: "ruby_leak", @@ -93,8 +93,8 @@ func TestDataflowRisks(t *testing.T) { { Name: "single detection - multiple occurences - deterministic output", Config: config, - FileContent: `{"type": "custom_classified", "detector_type":"rails_leak", "source": {"filename": "./users.rb", "line_number": 25}, "value": {"field_name": "User_name", "classification": {"data_type": {"data_category_name": "Username"} ,"decision":{"state": "valid"}}}} - {"type": "custom_classified", "detector_type":"rails_leak", "source": {"filename": "./users.rb", "line_number": 2}, "value": {"field_name": "User_name", "classification": {"data_type": {"data_category_name": "Username"} ,"decision":{"state": "valid"}}}}`, + FileContent: `{"type": "custom_classified", "detector_type":"rails_leak", "source": {"filename": "./users.rb", "line_number": 25}, "value": {"field_name": "User_name", "classification": {"data_type": {"name": "Username", "uuid": "123", "category_uuid": "456"} ,"decision":{"state": "valid"}}}} + {"type": "custom_classified", "detector_type":"rails_leak", "source": {"filename": "./users.rb", "line_number": 2}, "value": {"field_name": "User_name", "classification": {"data_type": {"name": "Username", "uuid": "123", "category_uuid": "456"} ,"decision":{"state": "valid"}}}}`, Want: []types.RiskDetector{ { DetectorID: "rails_leak", @@ -114,8 +114,8 @@ func TestDataflowRisks(t *testing.T) { { Name: "multiple detections - same detector - deterministic output", Config: config, - FileContent: `{"type": "custom_classified", "detector_type":"rails_leak", "source": {"filename": "./users.rb", "line_number": 25}, "value": {"field_name": "User_name", "classification": {"data_type": {"data_category_name": "Username"} ,"decision":{"state": "valid"}}}} -{"type": "custom_classified", "detector_type":"rails_leak", "source": {"filename": "./address.rb", "line_number": 2}, "value": {"field_name": "User_name", "classification": {"data_type": {"data_category_name": "Physical Address"} ,"decision":{"state": "valid"}}}}`, + FileContent: `{"type": "custom_classified", "detector_type":"rails_leak", "source": {"filename": "./users.rb", "line_number": 25}, "value": {"field_name": "User_name", "classification": {"data_type": {"name": "Username", "uuid": "123", "category_uuid": "456"} ,"decision":{"state": "valid"}}}} +{"type": "custom_classified", "detector_type":"rails_leak", "source": {"filename": "./address.rb", "line_number": 2}, "value": {"field_name": "User_name", "classification": {"data_type": {"name": "Physical Address", "uuid": "123", "category_uuid": "456"} ,"decision":{"state": "valid"}}}}`, Want: []types.RiskDetector{ { DetectorID: "rails_leak", @@ -161,7 +161,7 @@ func TestDataflowRisks(t *testing.T) { return } - dataflow, err := dataflow.GetOutput(detections, test.Config) + dataflow, err := dataflow.GetOutput(detections, test.Config, false) if err != nil { t.Fatalf("failed to get detectors output %s", err) return diff --git a/pkg/report/output/dataflow/types/datatypes.go b/pkg/report/output/dataflow/types/datatypes.go index fba18b93c..ab05e576e 100644 --- a/pkg/report/output/dataflow/types/datatypes.go +++ b/pkg/report/output/dataflow/types/datatypes.go @@ -2,6 +2,7 @@ package types type Datatype struct { Name string `json:"name"` + UUID string `json:"uuid,omitempty"` Detectors []DatatypeDetector `json:"detectors"` } diff --git a/pkg/report/output/dataflow/types/risks.go b/pkg/report/output/dataflow/types/risks.go index 5868e91a8..58f77350e 100644 --- a/pkg/report/output/dataflow/types/risks.go +++ b/pkg/report/output/dataflow/types/risks.go @@ -7,8 +7,8 @@ type RiskDetector struct { type RiskDatatype struct { Name string `json:"name"` - UUID string `json:"uuid"` - CategoryUUID string `json:"category_uuid"` + UUID string `json:"uuid,omitempty"` + CategoryUUID string `json:"category_uuid,omitempty"` Stored bool `json:"stored"` Locations []RiskLocation `json:"locations"` } diff --git a/pkg/report/output/output.go b/pkg/report/output/output.go index 8670e8b7a..d0e7b085b 100644 --- a/pkg/report/output/output.go +++ b/pkg/report/output/output.go @@ -60,7 +60,7 @@ func getReportOutput(report types.Report, config settings.Config) (any, error) { return nil, err } - return dataflow.GetOutput(detections, config) + return dataflow.GetOutput(detections, config, false) } else if config.Report.Report == flag.ReportPolicies { detections, err := detectors.GetOutput(report) @@ -68,7 +68,7 @@ func getReportOutput(report types.Report, config settings.Config) (any, error) { return nil, err } - dataflow, err := dataflow.GetOutput(detections, config) + dataflow, err := dataflow.GetOutput(detections, config, true) if err != nil { return nil, err } @@ -85,7 +85,7 @@ func getReportOutput(report types.Report, config settings.Config) (any, error) { return nil, err } - dataflowOutput, err := dataflow.GetOutput(detectorsOutput, config) + dataflowOutput, err := dataflow.GetOutput(detectorsOutput, config, true) if err != nil { return nil, err }