GoogleChrome · sjledoux · May 2, 2024 · May 7, 2024 · May 7, 2024 · cfredric
diff --git a/.github/workflows/rws-submissions-checks.yml b/.github/workflows/rws-submissions-checks.yml
@@ -46,20 +46,25 @@ jobs:
           CONTENTS: ${{ steps.read_results.outputs.contents }}
         run: echo "$CONTENTS"
       - name: Create Comment
-        if: steps.read_results.outputs.contents != 'success'
+        if: steps.read_results.outputs.contents != 'success' && ${{ !startsWith(steps.read_results.outputs.contents, 'Warning')}}
         run: |
          echo "It appears you have failed some tests. Here are your results:" > message.txt
          cat results.txt >> message.txt
-      - name: Comment if sucess
+      - name: Comment if success
         if: steps.read_results.outputs.contents == 'success'
         run: echo "Looks like you've passed all of the checks!" >> message.txt
+      - name: Comment if warning
+        if: startsWith(steps.read_results.outputs.contents, 'Warning')
+        run: |
+          echo "You have passed all of the checks, but there are one or more warnings associated with your submission." > message.txt
+          cat results.txt >> message.txt
       - name: Write Comment on PR
         uses: mshick/add-pr-comment@v2
         with:   
           message-path: "message.txt" 
           refresh-message-position: true
       - name: Fail or Succeed
-        if: steps.read_results.outputs.contents != 'success'
+        if: startsWith(steps.read_results.outputs.contents, 'It appears you have failed')
         uses: actions/github-script@v3
         with:
           script: |

diff --git a/RwsCheck.py b/RwsCheck.py
@@ -20,6 +20,7 @@
 from publicsuffix2 import PublicSuffixList
 
 WELL_KNOWN = "/.well-known/related-website-set.json"
+ASSOCIATED_LIMIT = 5
 
 class RwsCheck:
 
@@ -45,6 +46,7 @@ def __init__(self, rws_sites: json, etlds: PublicSuffixList, icanns: set):
         self.etlds = etlds
         self.icanns = icanns
         self.error_list = []
+        self.warning_texts = []
 
     def validate_schema(self, schema_file):
         """Validates the canonical sites list
@@ -170,6 +172,23 @@ def check_exclusivity(self, check_sets):
                 else:
                     site_list.update(aliases)
 
+    def check_associated_count(self, check_sets):
+        """This method checks for RwsSets that exceed the associated limit
+
+        Creates a warning for each set passed in the check_sets list that has 
+        more associatedSites than the ASSOCIATED_LIMIT
+
+        Args:
+            check_sets: Dict[string, RwsSet]
+        Returns:
+            None
+        """
+        for primary, rws in check_sets.items():
+            if len(rws.associated_sites) > ASSOCIATED_LIMIT:
+                self.warning_texts.append(
+                    f"Warning: the set for {primary} contains more than {ASSOCIATED_LIMIT} associated sites."
+                )
+
     def url_is_https(self, site):
         """A function that checks for https://
 

diff --git a/check_sites.py b/check_sites.py
@@ -130,7 +130,8 @@ def main():
     # Run rest of checks
     check_list = [
         rws_checker.has_all_rationales,
-        rws_checker.find_non_https_urls, 
+        rws_checker.find_non_https_urls,
+        rws_checker.check_associated_count,
         rws_checker.find_invalid_eTLD_Plus1,
         rws_checker.find_invalid_well_known, 
         rws_checker.find_invalid_alias_eSLDs, 
@@ -145,12 +146,14 @@ def main():
         except Exception as inst:
             error_texts.append(inst)
     # This message allows us to check the succes of our action
-    if rws_checker.error_list or error_texts:
+    if rws_checker.error_list or error_texts or rws_checker.warning_texts:
         for checker_error in rws_checker.error_list:
             print(checker_error)
         for error_text in error_texts:
             print(error_text)
-    else:
+        for warning in rws_checker.warning_texts:
+            print(warning)
+    else:   
         print("success", end='')
 
 

diff --git a/tests/rws_tests.py b/tests/rws_tests.py
@@ -276,6 +276,84 @@ def test_expected_rationales_case(self):
         loaded_sets = rws_check.load_sets()
         self.assertEqual(rws_check.error_list, [])  
 
+class TestCheckAssociatedCount(unittest.TestCase):
+    def test_within_limit(self):
+        json_dict = {
+            "sets":
+            [
+                {
+                    "primary": "https://primary.com",
+                    "associatedSites": ["https://associated1.com"],
+                    "rationaleBySite": {}
+                }
+            ]
+        }
+        rws_check = RwsCheck(rws_sites=json_dict,
+                      etlds=None,
+                       icanns=set())
+        loaded_sets = rws_check.load_sets()
+        rws_check.check_associated_count(loaded_sets)
+        self.assertEqual(rws_check.associated_warning, [])
+
+    def test_over_limit(self):
+        json_dict = {
+            "sets":
+            [
+                {
+                    "primary": "https://primary.com",
+                    "associatedSites": ["https://associated1.com",
+                                        "https://associated2.com",
+                                        "https://associated3.com",
+                                        "https://associated4.com",
+                                        "https://associated5.com",
+                                        "https://associated6.com"],
+                    "rationaleBySite": {}
+                }
+            ]
+        }
+        rws_check = RwsCheck(rws_sites=json_dict,
+                      etlds=None,
+                       icanns=set())
+        loaded_sets = rws_check.load_sets()
+        rws_check.check_associated_count(loaded_sets)
+        self.assertEqual(rws_check.associated_warning, 
+         ["Warning: the set for https://primary.com contains more than 5 associated sites."])
+
+    def test_multi_over_limit(self):
+        json_dict = {
+            "sets":
+            [
+                {
+                    "primary": "https://primary.com",
+                    "associatedSites": ["https://associated1.com",
+                                        "https://associated2.com",
+                                        "https://associated3.com",
+                                        "https://associated4.com",
+                                        "https://associated5.com",
+                                        "https://associated6.com"],
+                    "rationaleBySite": {}
+                },
+                {
+                  "primary": "https://primary2.com",
+                    "associatedSites": ["https://associated7.com",
+                                        "https://associated8.com",
+                                        "https://associated9.com",
+                                        "https://associated10.com",
+                                        "https://associated11.com",
+                                        "https://associated12.com"],
+                    "rationaleBySite": {}  
+                }
+            ]
+        }
+        rws_check = RwsCheck(rws_sites=json_dict,
+                      etlds=None,
+                       icanns=set())
+        loaded_sets = rws_check.load_sets()
+        rws_check.check_associated_count(loaded_sets)
+        self.assertEqual(rws_check.associated_warning, 
+         ["Warning: the set for https://primary.com contains more than 5 associated sites.",
+          "Warning: the set for https://primary2.com contains more than 5 associated sites."])
+
 class TestCheckExclusivity(unittest.TestCase):
     def test_servicesets_overlap(self):
         json_dict = {