# Lab1 Grading

Download submissions from github classrooms firstly. 
- Brew install gh
- Install gh classroom command: https://docs.github.com/en/education/manage-coursework-with-github-classroom/teach-with-github-classroom/using-github-classroom-with-github-cli#using-the-github-classroom-extension-with-github-cli-
- Copy the command from github classroom assignment web: e.g. gh classroom clone student-repos -a 679444 


## 1. get all repos and its grading rubrics. 

In [1]:
import os
import pandas as pd
import re
import requests

In [2]:
# Replace these values with your own
token = ""
owner = "nu-cs-sqe"
repo_prefix = "lab-1-collaborative-software-development-20242510-"

original_repo_folder = "/Users/qinjielin/Downloads/NWU/24fall-cs397/lab1/lab-1-collaborative-software-development-20242510-QinjieLin-NU"
submissions_folder = "/Users/qinjielin/Downloads/NWU/24fall-cs397/lab1/lab-1-collaborative-software-development-20242510-submissions"

In [3]:
# List only directories and get owner name
repo_names = [d for d in os.listdir(submissions_folder) if os.path.isdir(os.path.join(submissions_folder, d))]
github_owner_names = [name.replace(repo_prefix,"") for name in repo_names]

In [32]:
grading_md_txt = open(os.path.join(original_repo_folder, "grading.md"), "r").read()
# Regular expression pattern to match rubric items
pattern = r"\*\*\[Item\] (.*?)\*\*"
rubric_items = re.findall(pattern, grading_md_txt)
# Print out the rubric items
print("Rubric Items:")
for (idx, item) in enumerate(rubric_items):
    print(f"Rubric_{idx+1} - " + item)

Rubric Items:
Rubric_1 - .gitignore is completed with easy-to-read comments to separate the sections for different lanaguges or tools.
Rubric_2 - .gitignore is completed for all the required content: Java, IntelliJ, Windows, and MacOS.
Rubric_3 - GitHub Actions are correctly set up.
Rubric_4 - GitHub Workflow badge is correctly set up.
Rubric_5 - Branch protection is correctly set up.
Rubric_6 - Commit history shows that fix-fib-test0-test5 is done properly.
Rubric_7 - README.md contains the link to the PR that shows you have resolved a merge conflict.
Rubric_8 - Merge conflict was resolved properly.
Rubric_9 - Reflections.md is completed with meaning answers from an honest effort.


In [33]:
# Collect rows in a list
rows = []

# Initialize columns for the DataFrame
# Initialize columns for the DataFrame
columns = ["name", "repo_name"]

# Add columns for each rubric item and its feedback
for i in range(len(rubric_items)):
    columns.append(f"rubric_{i+1}")
    columns.append(f"rubric_{i+1}_feedback")

# Add total_score and pass columns at the end
columns += ["total_score", "pass"]

# Populate the DataFrame with student names and repository names
for (name, repo_name) in zip(github_owner_names, repo_names):
    row = {"name": name, "repo_name":repo_name}
    # Initialize rubric scores to 0
    for i in range(len(rubric_items)):
        row[f"rubric_{i+1}"] = 0
        row[f"rubric_{i+1}_feedback"] = ""
    row["total_score"] = 0
    row["pass"] = False
    rows.append(row)

# Create the DataFrame
df = pd.DataFrame(rows, columns=columns)

## 2. Grading rubrics

In [6]:
def is_empty_gitignore(repo_folder):
    gitignore_file = os.path.join(repo_folder, ".gitignore")
    if not os.path.exists(gitignore_file):
        return True
    with open(gitignore_file, "r") as file:
        content = file.read().strip()
        if not content:
            return True
        else:
            return False

In [7]:
# Function to download the Java and Gradle .gitignore templates
def download_gitignore_template(url):
    response = requests.get(url)
    if response.status_code == 200:
        return response.text.splitlines()
    else:
        raise Exception(f"Failed to download .gitignore template from {url}")
# Download Java and Gradle .gitignore files
java_gitignore_url = "https://raw.githubusercontent.com/github/gitignore/main/Java.gitignore"
gradle_gitignore_url = "https://raw.githubusercontent.com/github/gitignore/main/Gradle.gitignore"
maxos_gitignore_url= "https://raw.githubusercontent.com/github/gitignore/main/Global/macOS.gitignore"
windows_gitignore_url = "https://raw.githubusercontent.com/github/gitignore/main/Global/Windows.gitignore"
java_gitignore = download_gitignore_template(java_gitignore_url)
gradle_gitignore = download_gitignore_template(gradle_gitignore_url)
macos_gitignore = download_gitignore_template(maxos_gitignore_url)
windows_gitignore = download_gitignore_template(windows_gitignore_url)

# Function to check if student's .gitignore contains all lines from Java and Gradle templates
def is_complete_gitignore(repo_folder):
    gitignore_file = os.path.join(repo_folder, ".gitignore")
    if not os.path.exists(gitignore_file):
        return False  # No .gitignore file, both checks fail

    with open(gitignore_file, "r") as file:
        # student_gitignore = file.read().splitlines()
        student_gitignore = file.read()

    # Function to calculate the percentage of matching lines
    def calculate_match_percentage(student_gitignore, template_lines):
        # Filter out comment lines in the template
        template_lines = [line for line in template_lines if not line.startswith("#")]
        # Count the number of matching lines
        match_count = sum(1 for line in template_lines if line in student_gitignore)
        # Calculate percentage
        return (match_count / len(template_lines)) if template_lines else 0
    
    # Check if Java .gitignore lines are in student's .gitignore
    java_matches = all(line in student_gitignore for line in java_gitignore if not line.startswith("#"))
    gradle_matches = all(line in student_gitignore for line in gradle_gitignore if not line.startswith("#"))
    macos_matches = all(line in student_gitignore for line in macos_gitignore if not line.startswith("#"))
    windows_matches = all(line in student_gitignore for line in windows_gitignore if not line.startswith("#"))
    is_compelte = java_matches and gradle_matches and (macos_matches or windows_matches)
    
    java_matches_percentage = calculate_match_percentage(student_gitignore, java_gitignore)
    gradle_matches_percentage = calculate_match_percentage(student_gitignore, gradle_gitignore)
    macos_matches_percentage = calculate_match_percentage(student_gitignore, macos_gitignore)
    windows_matches_percentage = calculate_match_percentage(student_gitignore, windows_gitignore)
    # print(java_matches_percentage, gradle_matches_percentage, macos_matches_percentage, windows_matches_percentage)    
    matches_sum = sum([java_matches_percentage, gradle_matches_percentage, macos_matches_percentage, windows_matches_percentage])
    is_accepted = True if matches_sum>=2 else False
    
    return is_accepted

In [8]:
def is_empty_workflows(repo_folder):
    workflow_file = os.path.join(repo_folder, ".github/workflows/main.yml")
    if not os.path.exists(workflow_file):
        return True
    with open(workflow_file, "r") as file:
        content = file.read().strip()
        if not content:
            return True
        else:
            return False

In [9]:
def is_badge_correct(repo_folder, repo_name):
    readme_file = os.path.join(repo_folder, "README.md")
    if not os.path.exists(readme_file):
        return False
        
    with open(readme_file, "r") as file:
        student_readme = file.read()

    badge_str = f"![Gradle Build](https://github.com/nu-cs-sqe/{repo_name}/actions/workflows/main.yml/badge.svg)"
    is_correct = (badge_str in student_readme)
    return is_correct

In [10]:
def is_protection_rule_in_main(repo_name):
    owner = "nu-cs-sqe"
    repo = repo_name
    branch = "main"

    # GitHub API URL to get branch protection details
    url = f"https://api.github.com/repos/{owner}/{repo}/branches/{branch}/protection"
    
    # Headers for authorization and to specify API version
    headers = {
        "Authorization": f"Bearer {token}",
        "Accept": "application/vnd.github+json"
    }
    
    # Send GET request
    response = requests.get(url, headers=headers)
    
    # Check the response
    if response.status_code == 200:
        protection_data = response.json()
        # print(f"Branch protection is enabled for '{branch}' in '{repo}' with the following settings:")
        return True
    elif response.status_code == 404:
        # print(f"No branch protection rules are set for '{branch}' in '{repo}'.")
        return False
    else:
        # print(f"Failed to check branch protection. Status code: {response.status_code}")
        return False

In [39]:
def is_keywords_in_commit_history(repo_name, keywords):
    owner = "nu-cs-sqe"
    repo = repo_name
    branch = "main"

    url = f"https://api.github.com/repos/{owner}/{repo}/commits"
    headers = {
        "Authorization": f"Bearer {token}",
        "Accept": "application/vnd.github+json"
    }    
    response = requests.get(url, headers=headers)
    
    # Check if the response is successful
    found = False
    if response.status_code == 200:
        commits = response.json()
        for commit in commits:
            commit_message = commit['commit']['message']
            if any(keyword in commit_message for keyword in keywords):
                # print("Commit found:", commit['sha'])
                # print("Message:", commit_message)
                found = True
                break  # Stop after finding the first match
        # if not found:
            # print("No commit found with message containing 'fix-fib-test0-test5'.")
    else:
        print(f"Failed to retrieve commits for {repo_name}. Status code: {response.status_code}")
        print(response.json())

    return found

In [12]:
def has_pull_link_in_readme(repo_folder, repo_name):
    readme_file = os.path.join(repo_folder, "README.md")
    if not os.path.exists(readme_file):
        return False

    with open(readme_file, "r") as file:
        student_readme = file.read()

    target_link = f"github.com/nu-cs-sqe/{repo_name}/pull"
    is_link_in_readme = target_link in student_readme
    return is_link_in_readme

In [13]:
# Function to check the "Contributors" section format
def check_contributors_format(repo_folder):
    readme_file = os.path.join(repo_folder, "README.md")
    if not os.path.exists(readme_file):
        return False

    with open(readme_file, "r") as file:
        student_readme = file.read()
    
    # # Regular expression for the correct format
    # correct_pattern = re.compile(r"## Contributors\n(- [^\n]+(\n- [^\n]+)*)", re.MULTILINE)
    # # Regular expression for the incorrect format
    # incorrect_pattern = re.compile(r"## Contributors\n- [^,\n]+, [^,\n]+", re.MULTILINE)

    # # Regular expression for the correct format: "Contributor(s)" followed by separate lines for each name
    # correct_pattern = re.compile(r"## Contributor[s]?(?:\s.*)?\n(- [^\n]+(\n- [^\n]+)*)", re.MULTILINE)    
    # # Regular expression for the incorrect format: "Contributor(s)" followed by names on the same line with commas
    # incorrect_pattern = re.compile(r"## Contributor[s]?(?:\s.*)?\n- [^,\n]+, [^,\n]+", re.MULTILINE)
    
    # Correct format: allows an optional line between the "Contributors" heading and the list
    correct_pattern = re.compile(r"## Contributor[s]?(?:\s.*)?\n(?:[^\n]*\n)?(- [^\n]+(\n- [^\n]+)*)", re.MULTILINE)
    correct_pattern2 = re.compile(r"## Contributer[s]?(?:\s.*)?\n(?:[^\n]*\n)?(- [^\n]+(\n- [^\n]+)*)", re.MULTILINE)
    # Incorrect format: looks for comma-separated names on the same line after "Contributors" heading
    incorrect_pattern = re.compile(r"## Contributor[s]?(?:\s.*)?\n(?:[^\n]*\n)?- [^,\n]+, [^,\n]+", re.MULTILINE)
    
    # Check for correct and incorrect formats
    correct_match = correct_pattern.search(student_readme)
    correct_match2 = correct_pattern2.search(student_readme)
    incorrect_match = incorrect_pattern.search(student_readme)
    
    if (correct_match or correct_match2) and not incorrect_match:
        return True
    elif incorrect_match:
        return False
    else:
        return False


In [14]:
def has_answer_in_refletions(repo_folder):
    readme_file = os.path.join(repo_folder, "reflections.md")
    if not os.path.exists(readme_file):
        # print(f"not found {readme_file}")
        return False

    with open(readme_file, "r") as file:
        student_readme = file.read()

    question_str = ["""# Instruction
## This reflection doc is for you to examine your understanding, but not to test you. As long as you give an honest effort, you will be rewarded full credits.
## Please do not copy-paste the answer from anywhere else. "I don't remember and I will need to study more." is a valid answer that will receive full credits. """,
                    """# Q1: In your own words, what are the purpose of the branch protection rules?""",
                   "# Q2: In your own words, what is the purpose of GitHub Action?",
                   """# Q3: What are some concepts that you have never heard of or extensively used before this lab? Please list them. ("I have used them all" is a valid answer.)""",
                   "# Q4: We haven't looked into Gradle at all. What is your first intuition of the purpose of Gradle?",
                   "# Q5: Do you have any further questions? (N/A is a valid answer)",
                   ]

    for qs in question_str:
        student_readme = student_readme.replace(qs,"")

    if student_readme.strip():
        return True
    else:
        return False

## 3. Grading students

In [43]:
# Iterate over each row in the DataFrame to check .gitignore and update rubric_1
for index, row in df.iterrows():
    repo_name = row["repo_name"]
    repo_folder = os.path.join(submissions_folder, repo_name)
    
    # - .gitignore is completed with easy-to-read comments to separate the sections for different lanaguges or tools.
    if not is_empty_gitignore(repo_folder):
        df.at[index, "rubric_1"] = 1
        df.at[index, "rubric_1_feedback"] = "Pass"
    else:
        df.at[index, "rubric_1"] = 0
        feedback = "Empty .gitignore file."
        df.at[index, "rubric_1_feedback"] = feedback

    # - .gitignore is completed for all the required content: Java, IntelliJ, Windows, and MacOS.
    if is_complete_gitignore(repo_folder) is True:
        df.at[index, "rubric_2"] = 1
        df.at[index, "rubric_2_feedback"] = "Pass"
    else:
        df.at[index, "rubric_2"] = 0
        feedback = "Incomplete .gitignore file."
        df.at[index, "rubric_2_feedback"] = feedback

    # - GitHub Actions are correctly set up.
    if not is_empty_workflows(repo_folder):
        df.at[index, "rubric_3"] = 1
        df.at[index, "rubric_3_feedback"] = "Pass"
    else:
        df.at[index, "rubric_3"] = 0
        feedback = "Incomplete Actions."
        df.at[index, "rubric_3_feedback"] = feedback

    # - GitHub Workflow badge is correctly set up.
    if is_badge_correct(repo_folder, repo_name):
        df.at[index, "rubric_4"] = 1
        df.at[index, "rubric_4_feedback"] = "Pass"
    else:
        df.at[index, "rubric_4"] = 0
        feedback = "Incorrect badge setup."
        df.at[index, "rubric_4_feedback"] = feedback
        
    # - Branch protection is correctly set up.
    if is_protection_rule_in_main(repo_name):
        df.at[index, "rubric_5"] = 1
        df.at[index, "rubric_5_feedback"] = "Pass"
    else:
        df.at[index, "rubric_5"] = 0
        feedback = "Incorrect branch protection rule."
        df.at[index, "rubric_5_feedback"] = feedback
        
    # - Commit history shows that fix-fib-test0-test5 is done properly.
    if is_keywords_in_commit_history(repo_name, keywords=["fix-fib", "fix fib"]):
        df.at[index, "rubric_6"] = 1
        df.at[index, "rubric_6_feedback"] = "Pass"
    else:
        df.at[index, "rubric_6"] = 0
        feedback = "No found fix-fib-test0-test5 in commit history."
        df.at[index, "rubric_6_feedback"] = feedback
        
    # - README.md contains the link to the PR that shows you have resolved a merge conflict.
    if has_pull_link_in_readme(repo_folder, repo_name):
        df.at[index, "rubric_7"] = 1
        df.at[index, "rubric_7_feedback"] = "Pass"
    else:
        df.at[index, "rubric_7"] = 0
        feedback = "README.md doesn't contain the link to the PR that shows you have resolved a merge conflict."
        df.at[index, "rubric_7_feedback"] = feedback

    # - Merge conflict was resolved properly.
    if check_contributors_format(repo_folder):
        df.at[index, "rubric_8"] = 1
        df.at[index, "rubric_8_feedback"] = "Pass"
    else:
        df.at[index, "rubric_8"] = 0
        feedback = "Incorrect Contributors format. Refer to page 22 in the instruction pdf."
        df.at[index, "rubric_8_feedback"] = feedback

    # - Reflections.md is completed with meaning answers from an honest effort
    if has_answer_in_refletions(repo_folder):
        df.at[index, "rubric_9"] = 1
        df.at[index, "rubric_9_feedback"] = "Pass"
    else:
        df.at[index, "rubric_9"] = 0
        feedback = "Please submit reflections.md in the repo."
        df.at[index, "rubric_9_feedback"] = feedback

In [44]:
# Calculate the total score for each student and set the pass flag if total_score == 9
rubric_columns = [f"rubric_{i+1}" for i in range(9)]
df["total_score"] = df[rubric_columns].sum(axis=1)
df["pass"] = df["total_score"] == 9

In [45]:
df

Unnamed: 0,name,repo_name,rubric_1,rubric_1_feedback,rubric_2,rubric_2_feedback,rubric_3,rubric_3_feedback,rubric_4,rubric_4_feedback,...,rubric_6,rubric_6_feedback,rubric_7,rubric_7_feedback,rubric_8,rubric_8_feedback,rubric_9,rubric_9_feedback,total_score,pass
0,emran2602,lab-1-collaborative-software-development-20242...,1,Pass,1,Pass,1,Pass,1,Pass,...,1,Pass,0,README.md doesn't contain the link to the PR t...,1,Pass,1,Pass,8,False
1,brennanb2025,lab-1-collaborative-software-development-20242...,1,Pass,1,Pass,1,Pass,1,Pass,...,1,Pass,1,Pass,1,Pass,1,Pass,9,True
2,adampchen,lab-1-collaborative-software-development-20242...,1,Pass,1,Pass,1,Pass,1,Pass,...,1,Pass,0,README.md doesn't contain the link to the PR t...,1,Pass,0,Please submit reflections.md in the repo.,7,False
3,arojas1234,lab-1-collaborative-software-development-20242...,1,Pass,1,Pass,1,Pass,1,Pass,...,1,Pass,0,README.md doesn't contain the link to the PR t...,1,Pass,0,Please submit reflections.md in the repo.,7,False
4,Jiho1211,lab-1-collaborative-software-development-20242...,1,Pass,1,Pass,1,Pass,1,Pass,...,1,Pass,0,README.md doesn't contain the link to the PR t...,0,Incorrect Contributors format. Refer to page 2...,0,Please submit reflections.md in the repo.,6,False
5,Annie-LAN,lab-1-collaborative-software-development-20242...,1,Pass,1,Pass,1,Pass,1,Pass,...,1,Pass,0,README.md doesn't contain the link to the PR t...,1,Pass,1,Pass,8,False
6,hyunbinjlee,lab-1-collaborative-software-development-20242...,1,Pass,1,Pass,1,Pass,1,Pass,...,1,Pass,0,README.md doesn't contain the link to the PR t...,1,Pass,1,Pass,8,False
7,Liuabi520,lab-1-collaborative-software-development-20242...,1,Pass,1,Pass,1,Pass,1,Pass,...,1,Pass,1,Pass,1,Pass,1,Pass,9,True
8,clementkubica,lab-1-collaborative-software-development-20242...,1,Pass,1,Pass,1,Pass,1,Pass,...,1,Pass,0,README.md doesn't contain the link to the PR t...,1,Pass,1,Pass,8,False
9,LianhaoZ,lab-1-collaborative-software-development-20242...,1,Pass,1,Pass,1,Pass,1,Pass,...,1,Pass,0,README.md doesn't contain the link to the PR t...,1,Pass,1,Pass,8,False


In [46]:
# Save the DataFrame as a CSV file
df.to_csv("lab1_grading.csv", index=False)

#### small tests

In [17]:
test_repo = "lab-1-collaborative-software-development-20242510-andrewericgao"
test_folder = os.path.join(submissions_folder, test_repo)
is_badge_correct(test_folder, test_repo)

True

In [18]:
test_repo = "lab-1-collaborative-software-development-20242510-QinjieLin-NU"
is_protection_rule_in_main(test_repo)

False

In [19]:
is_keywords_in_commit_history(test_repo, "fix-fib-test0-test5")

False

In [41]:
test_repo = "lab-1-collaborative-software-development-20242510-Annie-LAN"
is_keywords_in_commit_history(test_repo, "fix-fib-test0-test5")

True

In [21]:
test_repo = "lab-1-collaborative-software-development-20242510-ZaynMak"
test_folder = os.path.join(submissions_folder, test_repo)
has_pull_link_in_readme(test_folder, test_repo)

True

In [22]:
check_contributors_format(test_folder)

True

In [23]:
test_repo = "lab-1-collaborative-software-development-20242510-Adrian-DH"
test_folder = os.path.join(submissions_folder, test_repo)
check_contributors_format(test_folder)

True

In [24]:
test_repo = "lab-1-collaborative-software-development-20242510-robertyang13"
test_folder = os.path.join(submissions_folder, test_repo)
check_contributors_format(test_folder)

True

In [25]:
test_repo = "lab-1-collaborative-software-development-20242510-andrewericgao"
test_folder = os.path.join(submissions_folder, test_repo)
check_contributors_format(test_folder)

True

In [26]:
test_repo = "lab-1-collaborative-software-development-20242510-Annie-LAN"
test_folder = os.path.join(submissions_folder, test_repo)
check_contributors_format(test_folder)

True

In [27]:
has_answer_in_refletions(test_folder)

True

In [28]:
test_repo = "lab-1-collaborative-software-development-20242510-isagonzalez"
test_folder = os.path.join(submissions_folder, test_repo)
has_answer_in_refletions(test_folder)

True