In [1]:
import os
import xml.etree.ElementTree as ET
import pandas as pd



# Define a function to parse the XML files
def parse_xml_file(xml_path):
    tree = ET.parse(xml_path)
    root = tree.getroot()
    return len(root.findall("./problem"))

# Define a function to process all the XML files in a student's directory
def process_student_dir(student_dir):
    xml_files = [f for f in os.listdir(student_dir) if f.endswith(".xml")]
    problem_counts = [parse_xml_file(os.path.join(student_dir, f)) for f in xml_files]
    return sum(problem_counts)

# Define a function to process all the student directories for an assignment
def process_assignment_dir(assignment_dir):
    student_dirs = [f for f in os.listdir(assignment_dir) if f.startswith("Student")]
    problem_counts = [process_student_dir(os.path.join(assignment_dir, d)) for d in student_dirs]
    return problem_counts

# Define a function to create a DataFrame with the problem counts for each assignment and student
def create_dataframe():
    assignment_dirs = [f for f in os.listdir(".") if f.startswith("Assignment")]
    data = []
    for assignment_dir in assignment_dirs:
        problem_counts = process_assignment_dir(assignment_dir)
        for i, count in enumerate(problem_counts):
            data.append({"assignment": assignment_dir, "student": f"Student {i+1:02}", "problem_count": count})
    return pd.DataFrame(data)

# Call the create_dataframe function and print the result
df = create_dataframe()
print(df)


                 assignment     student  problem_count
0        Assignment-03_Grid  Student 01             22
1        Assignment-03_Grid  Student 02             20
2        Assignment-03_Grid  Student 03             18
3        Assignment-03_Grid  Student 04             13
4        Assignment-03_Grid  Student 05             17
..                      ...         ...            ...
391  Assignment-19_JavaChat  Student 21             36
392  Assignment-19_JavaChat  Student 22             29
393  Assignment-19_JavaChat  Student 23             14
394  Assignment-19_JavaChat  Student 24             36
395  Assignment-19_JavaChat  Student 25             66

[396 rows x 3 columns]


In [3]:
import os
import xml.etree.ElementTree as ET
import pandas as pd



# Define a function to parse the XML files
def parse_xml_file(xml_path):
    tree = ET.parse(xml_path)
    root = tree.getroot()
    problems = root.findall("problem")

    problem_data = {}
    for i, problem in enumerate(problems):
        problem_class = problem.find("problem_class")
        problem_class_name = problem_class.text
        description = problem.find("description").text
        problem_data[f"problem{i+1}"] = f"{problem_class_name}: {description}"

    return problem_data


# Define a function to process all the XML files in a student's directory
def process_student_dir(student_dir):
    xml_files = [f for f in os.listdir(student_dir) if f.endswith(".xml")]
    problem_data = {}
    for xml_file in xml_files:
        xml_path = os.path.join(student_dir, xml_file)
        problem_data.update(parse_xml_file(xml_path))
    return problem_data

# Define a function to process all the student directories for an assignment
def process_assignment_dir(assignment_dir):
    student_dirs = [f for f in os.listdir(assignment_dir) if f.startswith("Student")]
    problem_data = {}
    for student_dir in student_dirs:
        student_data = process_student_dir(os.path.join(assignment_dir, student_dir))
        problem_data[student_dir] = student_data
    return problem_data

# Define a function to create a DataFrame with the problem data for each assignment and student
def create_dataframe():
    assignment_dirs = [f for f in os.listdir(".") if f.startswith("Assignment")]
    data = []
    for assignment_dir in assignment_dirs:
        problem_data = process_assignment_dir(assignment_dir)
        assignment_df = pd.DataFrame(problem_data).transpose()
        assignment_df.index.name = "student"
        assignment_df.reset_index(inplace=True)
        assignment_df.insert(0, "assignment", assignment_dir)
        data.append(assignment_df)
    return pd.concat(data)

# Call the create_dataframe function and print the result
df = create_dataframe()
print(df)


                assignment     student  \
0       Assignment-03_Grid  Student 07   
1       Assignment-03_Grid  Student 14   
2       Assignment-03_Grid  Student 21   
3       Assignment-03_Grid  Student 23   
4       Assignment-03_Grid  Student 24   
..                     ...         ...   
20  Assignment-19_JavaChat  Student 37   
21  Assignment-19_JavaChat  Student 40   
22  Assignment-19_JavaChat  Student 42   
23  Assignment-19_JavaChat  Student 44   
24  Assignment-19_JavaChat  Student 45   

                                             problem1  \
0   Wrong package statement: Package name 'noapple...   
1   Wrong package statement: Package name 'noapple...   
2   Wrong package statement: Package name 'noapple...   
3   Wrong package statement: Package name 'noapple...   
4   Wrong package statement: Package name 'noapple...   
..                                                ...   
20  Wrong package statement: Package name 'pack' d...   
21  Wrong package statement: Package na

In [4]:
import os
import pandas as pd
import xml.etree.ElementTree as ET

def parse_xml_file(xml_path, assignment_name):
    tree = ET.parse(xml_path)
    root = tree.getroot()
    problems = root.findall("problem")

    problem_data = {}
    for problem in problems:
        problem_id = problem.find("problem_class").get("id")
        problem_name = f"{assignment_name} - {problem_id}"
        if problem_name not in problem_data:
            problem_data[problem_name] = 0
        problem_data[problem_name] += 1

    return problem_data

def process_assignment(assignment_path):
    assignment_name = os.path.basename(assignment_path)
    student_folders = [f for f in os.listdir(assignment_path) if f.startswith("Student")]
    student_data = {}
    for student_folder in student_folders:
        student_id = int(student_folder.split(" ")[1])
        student_files = os.listdir(os.path.join(assignment_path, student_folder))
        problem_counts = {}
        for file in student_files:
            if file.endswith(".xml"):
                xml_path = os.path.join(assignment_path, student_folder, file)
                problem_data = parse_xml_file(xml_path, assignment_name)
                for problem_name, count in problem_data.items():
                    if problem_name not in problem_counts:
                        problem_counts[problem_name] = 0
                    problem_counts[problem_name] += count
        student_data[student_id] = problem_counts
    return student_data

def create_table(assignment_paths):
    table_data = {}
    for assignment_path in assignment_paths:
        assignment_name = os.path.basename(assignment_path)
        student_data = process_assignment(assignment_path)
        for student_id, problem_counts in student_data.items():
            if student_id not in table_data:
                table_data[student_id] = {}
            for problem_name, count in problem_counts.items():
                if problem_name not in table_data[student_id]:
                    table_data[student_id][problem_name] = 0
                table_data[student_id][problem_name] += count

    table_df = pd.DataFrame.from_dict(table_data, orient="index")
    table_df.index.name = "student"
    table_df = table_df.fillna(0)
    table_df = table_df.astype(int)
    table_df = table_df.sort_index(axis=1)

    return table_df

if __name__ == "__main__":
    assignment_paths = [f for f in os.listdir(".") if f.startswith("Assignment")]
    table = create_table(assignment_paths)
    print(table)


         Assignment-03_Grid -   Assignment-03_Grid - CanBeFinal  \
student                                                           
7                            5                                0   
14                           2                                0   
21                           2                                0   
23                           2                                0   
24                           2                                0   
27                           2                                0   
28                           2                                0   
29                           2                                0   
30                           2                                0   
32                           2                                0   
33                           2                                0   
34                           2                                0   
35                           2                                

In [6]:
import os
import pandas as pd
import xml.etree.ElementTree as ET


def parse_xml_file(xml_path):
    tree = ET.parse(xml_path)
    root = tree.getroot()
    problems = root.findall("problem")

    problem_data = {}
    for problem in problems:
        problem_id = problem.find("problem_class").get("id")
        problem_data[problem_id] = problem_data.get(problem_id, 0) + 1

    return problem_data


def process_assignment(assignment_path):
    assignment_name = os.path.basename(assignment_path)
    student_folders = [f for f in os.listdir(assignment_path) if f.startswith("Student")]
    problem_names = set()
    student_data = {}
    for student_folder in student_folders:
        student_id = int(student_folder.split(" ")[1])
        student_files = os.listdir(os.path.join(assignment_path, student_folder))
        problem_counts = {}
        for file in student_files:
            if file.endswith(".xml"):
                xml_path = os.path.join(assignment_path, student_folder, file)
                problem_data = parse_xml_file(xml_path)
                for problem_name, count in problem_data.items():
                    problem_counts[problem_name] = problem_counts.get(problem_name, 0) + count
                    problem_names.add(problem_name)
        student_data[student_id] = problem_counts
    return assignment_name, student_data, problem_names


def create_table(assignment_paths):
    problem_names = set()
    table_data = {}
    for assignment_path in assignment_paths:
        assignment_name, student_data, assignment_problem_names = process_assignment(assignment_path)
        problem_names.update(assignment_problem_names)
        for student_id, problem_counts in student_data.items():
            if student_id not in table_data:
                table_data[student_id] = {}
            for problem_name, count in problem_counts.items():
                table_data[student_id][problem_name] = table_data[student_id].get(problem_name, 0) + count

    table_df = pd.DataFrame.from_dict(table_data, orient="index")
    table_df.index.name = "student"
    table_df = table_df.fillna(0)
    table_df = table_df.astype(int)
    table_df = table_df.reindex(sorted(table_df.columns), axis=1)

    return table_df


if __name__ == "__main__":
    assignment_paths = [f for f in os.listdir(".") if f.startswith("Assignment")]
    table = create_table(assignment_paths)
    print(table)


             AccessStaticViaInstance  AnonymousClassComplexity  \
student                                                          
7        12                        0                         1   
14        9                        0                         0   
21       11                        3                         0   
23       10                        0                         0   
24        9                        1                         0   
27       10                        6                         0   
28        6                        0                         0   
29        9                        0                         0   
30        6                        0                         0   
32       10                        0                         0   
33       14                        0                         1   
34        9                        0                         0   
35        7                        0                         0   
37        

In [9]:
table

Unnamed: 0_level_0,Unnamed: 1_level_0,AccessStaticViaInstance,AnonymousClassComplexity,AnonymousClassMethodCount,AutoCloseableResource,BooleanMethodIsAlwaysInverted,BusyWait,CStyleArrayDeclaration,CanBeFinal,CatchMayIgnoreException,...,UnnecessaryModifier,UnnecessaryReturn,UnnecessarySemicolon,UnnecessaryToStringCall,UnnecessaryUnaryMinus,UnusedAssignment,UnusedReturnValue,UtilityClass,WrongPackageStatement,unused
student,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
7,12,0,1,4,1,0,0,1,20,0,...,0,0,0,0,0,0,0,1,13,53
14,9,0,0,3,0,0,0,0,33,0,...,6,0,1,1,0,1,0,3,19,62
21,11,3,0,1,0,0,0,0,24,1,...,0,0,0,0,0,1,0,3,24,103
23,10,0,0,0,1,0,0,0,12,1,...,0,0,0,0,0,0,0,4,29,73
24,9,1,0,0,0,0,0,0,37,0,...,7,0,0,1,0,1,2,5,26,114
27,10,6,0,0,0,0,0,0,25,1,...,0,0,0,0,0,4,0,2,31,79
28,6,0,0,1,0,0,0,0,16,0,...,1,0,0,0,0,0,0,1,10,34
29,9,0,0,3,0,0,0,0,26,0,...,2,0,0,0,0,1,0,2,32,94
30,6,0,0,1,0,0,0,0,6,0,...,0,0,0,0,0,2,0,3,15,42
32,10,0,0,1,1,0,0,0,32,0,...,0,0,0,1,0,3,0,2,15,62


In [28]:
import os
import pandas as pd
import xml.etree.ElementTree as ET


def parse_xml_file(xml_path):
    tree = ET.parse(xml_path)
    root = tree.getroot()
    problems = root.findall("problem")

    problem_data = {}
    for problem in problems:
        problem_id = problem.find("problem_class").get("id")
        problem_data[problem_id] = problem_data.get(problem_id, 0) + 1

    return problem_data


def process_assignment(assignment_path):
    assignment_name = os.path.basename(assignment_path)
    student_folders = [f for f in os.listdir(assignment_path) if f.startswith("Student")]
    problem_names = set()
    student_data = {}
    for student_folder in student_folders:
        student_id = int(student_folder.split(" ")[1])
        student_files = os.listdir(os.path.join(assignment_path, student_folder))
        problem_counts = {}
        for file in student_files:
            if file.endswith(".xml"):
                xml_path = os.path.join(assignment_path, student_folder, file)
                problem_data = parse_xml_file(xml_path)
                for problem_name, count in problem_data.items():
                    problem_counts[problem_name] = problem_counts.get(problem_name, 0) + count
                    problem_names.add(problem_name)
        student_data[student_id] = problem_counts
    return assignment_name, student_data, problem_names


def create_table(assignment_paths):
    problem_names = set()
    table_data = {}
    for assignment_path in assignment_paths:
        assignment_name, student_data, assignment_problem_names = process_assignment(assignment_path)
        problem_names.update(assignment_problem_names)
        for student_id, problem_counts in student_data.items():
            if student_id not in table_data:
                table_data[student_id] = {}
            for problem_name, count in problem_counts.items():
                table_data[student_id][problem_name] = table_data[student_id].get(problem_name, 0) + count

    table_df = pd.DataFrame.from_dict(table_data, orient="index")
    table_df.index.name = "student"
    table_df = table_df.fillna(0)
    table_df = table_df.astype(int)
    table_df = table_df.reindex(sorted(table_df.columns), axis=1)

    return table_df


if __name__ == "__main__":
    assignment_paths = [f for f in os.listdir(".") if f.startswith("Assignment-19")]
    table = create_table(assignment_paths)
    print(table)

            AnonymousClassComplexity  AutoCloseableResource  CanBeFinal  \
student                                                                   
2        1                         0                      0           0   
4        1                         0                      1           0   
6        2                         0                      1           2   
7        1                         0                      1           0   
8        1                         0                      0           0   
10       1                         0                      2           0   
11       1                         0                      0           0   
12       2                         0                      2           2   
14       1                         0                      0           0   
15       1                         0                      1           0   
18       1                         0                      0           2   
20       2               

In [29]:
table

Unnamed: 0_level_0,Unnamed: 1_level_0,AnonymousClassComplexity,AutoCloseableResource,CanBeFinal,CatchMayIgnoreException,ChainedMethodCall,CodeBlock2Expr,CommentedOutCode,ConstantValue,Convert2Diamond,...,StaticImport,StringEquality,SynchronizeOnNonFinalField,ThrowablePrintedToSystemOut,TryWithIdenticalCatches,UNUSED_IMPORT,UnnecessarySemicolon,UnusedAssignment,WrongPackageStatement,unused
student,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,2,0,0,1,3
4,1,0,1,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,1,2
6,2,0,1,2,0,3,0,0,0,1,...,0,0,2,0,0,0,0,0,2,2
7,1,0,1,0,0,2,0,0,0,0,...,0,0,0,0,0,0,0,0,1,2
8,1,0,0,0,1,0,0,0,0,0,...,1,3,0,0,0,5,0,1,1,2
10,1,0,2,0,0,0,0,1,0,0,...,0,0,0,0,0,3,0,1,1,3
11,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,1,0,1,1,3
12,2,0,2,2,0,4,0,0,0,1,...,0,0,2,0,0,0,0,0,2,2
14,1,0,0,0,0,1,1,0,0,0,...,0,0,0,0,0,4,0,0,1,2
15,1,0,1,0,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,1,3


In [24]:
table['MissingJavadoc']

student
2     2
4     2
7     3
8     2
10    2
11    2
12    2
14    2
15    3
18    2
20    3
21    2
22    2
23    2
24    1
29    3
32    2
33    2
34    2
37    2
40    2
41    2
42    2
44    2
31    0
Name: MissingJavadoc, dtype: int32

In [1]:
import os
import pandas as pd
import xml.etree.ElementTree as ET


def parse_xml_file(xml_path):
    tree = ET.parse(xml_path)
    root = tree.getroot()
    problems = root.findall("problem")

    problem_data = {}
    for problem in problems:
        problem_id = problem.find("problem_class").get("id")
        problem_data[problem_id] = problem_data.get(problem_id, 0) + 1

    return problem_data


def process_assignment(assignment_path):
    assignment_name = os.path.basename(assignment_path)
    student_folders = [f for f in os.listdir(assignment_path) if f.startswith("Student")]
    problem_names = set()
    student_data = {}
    for student_folder in student_folders:
        student_id = int(student_folder.split(" ")[1])
        student_files = os.listdir(os.path.join(assignment_path, student_folder))
        problem_counts = {}
        for file in student_files:
            if file.endswith(".xml"):
                xml_path = os.path.join(assignment_path, student_folder, file)
                problem_data = parse_xml_file(xml_path)
                for problem_name, count in problem_data.items():
                    problem_counts[problem_name] = problem_counts.get(problem_name, 0) + count
                    problem_names.add(problem_name)
        student_data[student_id] = problem_counts
    return assignment_name, student_data, problem_names


def create_table(assignment_paths):
    problem_names = set()
    table_data = {}
    for assignment_path in assignment_paths:
        assignment_name, student_data, assignment_problem_names = process_assignment(assignment_path)
        problem_names.update(assignment_problem_names)
        for student_id, problem_counts in student_data.items():
            if student_id not in table_data:
                table_data[student_id] = {}
            for problem_name, count in problem_counts.items():
                table_data[student_id][problem_name] = table_data[student_id].get(problem_name, 0) + count

    table_df = pd.DataFrame.from_dict(table_data, orient="index")
    table_df.index.name = "student"
    table_df = table_df.fillna(0)
    table_df = table_df.astype(int)
    table_df = table_df.reindex(sorted(table_df.columns), axis=1)

    return table_df


if __name__ == "__main__":
    assignment_paths = [f for f in os.listdir(".") if f.startswith("Homework")]
    table = create_table(assignment_paths)
    print(table)

            AccessStaticViaInstance  AnonymousClassComplexity  \
student                                                         
2        2                        0                         0   
4        1                        0                         2   
6        1                        0                         0   
8        1                        0                         0   
9        1                        0                         0   
10       1                        0                         1   
11       2                        0                         1   
12       1                        0                         1   
13       3                        0                         0   
14       4                        0                         2   
18       2                        0                         3   
19       1                        0                         1   
20       1                        0                         0   
22       1               

In [2]:
table

Unnamed: 0_level_0,Unnamed: 1_level_0,AccessStaticViaInstance,AnonymousClassComplexity,AnonymousClassMethodCount,BooleanMethodIsAlwaysInverted,CStyleArrayDeclaration,CanBeFinal,CatchMayIgnoreException,ChainedMethodCall,ClassCanBeRecord,...,UnnecessaryContinue,UnnecessaryLocalVariable,UnnecessaryModifier,UnnecessaryReturn,UnnecessarySemicolon,UnusedAssignment,UnusedReturnValue,UtilityClass,WrongPackageStatement,unused
student,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,2,0,0,0,0,0,16,0,0,0,...,0,0,0,0,0,1,0,1,11,71
4,1,0,2,0,0,0,14,0,4,0,...,0,0,0,0,0,1,1,0,10,37
6,1,0,0,0,0,0,12,0,3,1,...,0,0,0,0,0,1,1,1,8,15
8,1,0,0,0,0,0,3,0,3,2,...,0,0,0,0,0,0,0,0,4,15
9,1,0,0,0,0,0,8,0,2,1,...,0,1,0,0,0,0,0,1,7,37
10,1,0,1,0,0,0,10,0,3,0,...,0,0,0,0,1,2,0,1,8,24
11,2,0,1,1,0,0,4,0,4,1,...,0,0,0,0,1,0,0,1,8,17
12,1,0,1,0,0,0,20,0,4,0,...,0,2,9,0,0,0,0,1,9,8
13,3,0,0,0,0,0,12,0,2,0,...,0,0,0,0,0,0,0,2,12,2
14,4,0,2,0,0,0,31,17,0,0,...,0,1,0,0,0,2,0,2,11,28


In [8]:
# Compute the sum of each column
sums = table.sum()

max_columns = sums.nlargest(10).index.tolist()

# Print the results
print('Sum of each column:')
print(sums)
print('Top 10 columns with the highest sum:', max_columns)

Sum of each column:
                                  33
AccessStaticViaInstance            3
AnonymousClassComplexity          15
AnonymousClassMethodCount          5
BooleanMethodIsAlwaysInverted      2
                                ... 
UnusedAssignment                  17
UnusedReturnValue                  6
UtilityClass                      20
WrongPackageStatement            187
unused                           542
Length: 79, dtype: int64
Top 10 columns with the highest sum: ['MissingJavadoc', 'unused', 'SingleClassImport', 'CanBeFinal', 'WrongPackageStatement', 'MultipleReturnPointsPerMethod', 'MethodWithMultipleLoops', 'FieldMayBeFinal', 'ChainedMethodCall', 'FeatureEnvy']


In [14]:
codeSmells=table[['MissingJavadoc', 'unused', 'SingleClassImport', 'CanBeFinal', 'WrongPackageStatement', 'MultipleReturnPointsPerMethod', 'MethodWithMultipleLoops', 'FieldMayBeFinal', 'ChainedMethodCall', 'FeatureEnvy']]
codeSmells

Unnamed: 0_level_0,MissingJavadoc,unused,SingleClassImport,CanBeFinal,WrongPackageStatement,MultipleReturnPointsPerMethod,MethodWithMultipleLoops,FieldMayBeFinal,ChainedMethodCall,FeatureEnvy
student,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2,102,71,14,16,11,7,9,10,0,3
4,34,37,40,14,10,5,7,12,4,3
6,51,15,12,12,8,8,9,10,3,9
8,32,15,2,3,4,5,3,0,3,1
9,38,37,4,8,7,5,5,2,2,0
10,23,24,36,10,8,5,3,4,3,0
11,66,17,4,4,8,9,3,0,4,6
12,18,8,7,20,9,8,4,1,4,5
13,33,2,7,12,12,9,7,6,2,7
14,47,28,18,31,11,10,6,0,0,1


In [15]:
codeSmells.to_csv('hw4.csv', index=False)