In [6]:
import re

# read all python lint errors
with open('py_lint_res.txt', 'r', encoding='utf-8') as f:
    pylint_results = f.read()

error_list = []
for line in pylint_results.split('\n'):

    if line.strip():

        match = re.search(r'python_codes/(\d+\.py):.*?([A-Z]\d{4})', line)
        if match:
            file_name = match.group(1)
            error_code = match.group(2)
            
            error_list.append((file_name,error_code))



In [7]:
import pandas as pd
import altair as alt

# Convert error list to DataFrame
error_df = pd.DataFrame(error_list, columns=['file_name', 'error_code'])

# Count error types
error_counts = error_df['error_code'].value_counts().reset_index()
error_counts.columns = ['error_code', 'count']
error_counts

Unnamed: 0,error_code,count
0,C0303,79
1,C0301,63
2,W0611,44
3,E0001,31
4,W0613,27
5,C0411,24
6,W0621,22
7,R0903,20
8,W0612,16
9,E1101,13


In [8]:
# Create bar chart
bar_chart = alt.Chart(error_counts).mark_bar().encode(
    x=alt.X('error_code', sort='-y'),
    y='count',
    color='error_code',
    tooltip=['error_code', 'count']
).properties(
    width=400,
    height=400,
    title='Distribution of Error Types'
)

bar_chart


  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


In [9]:
# c0303 Trailing whitespace (trailing-whitespace)
# c0301 Line too long (line-too-long)
# w0611 unused import
# e0001 in most cases they are snippets of code that are not complete and thus cannot be parsed
# w0613 which is very common like unused import 
# c0411 wrong-import-order
# W0621 name confict


In [10]:
import os

# Count total lines in python files
total_lines = 0
for root, dirs, files in os.walk('./python_codes'):
    for file in files:
        if file.endswith('.py'):
            with open(os.path.join(root, file), 'r', encoding='utf-8') as f:
                total_lines += sum(1 for line in f)

# Count error lines from error_df
error_lines = len(error_df)

print(f"Total lines of Python code: {total_lines}")
print(f"Total error lines: {error_lines}")
print(f"Error rate: {error_lines/total_lines*100:.2f}%")


Total lines of Python code: 5491
Total error lines: 452
Error rate: 8.23%


In [None]:
# Read and parse the JSON file
import json

with open('eslint_report.json', 'r', encoding='utf-8') as f:
    eslint_results = json.load(f)

# Extract error information
error_list = []
for file_result in eslint_results:
    for message in file_result['messages']:
        file_name = os.path.basename(file_result['filePath'])
        error_code = message['ruleId']
        error_list.append((file_name, error_code))

# Create DataFrame and count errors
error_df = pd.DataFrame(error_list, columns=['file_name', 'error_code'])

# Count error types
error_counts = error_df['error_code'].value_counts().reset_index()
error_counts.columns = ['error_code', 'count']

# Create bar chart
js_bar_chart = alt.Chart(error_counts).mark_bar().encode(
    x=alt.X('error_code', sort='-y'),
    y='count',
    color='error_code',
    tooltip=['error_code', 'count']
).properties(
    width=400,
    height=400,
    title='Distribution of JavaScript Error Types'
)

# Display the chart
js_bar_chart


  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


In [12]:

# Count total lines in JavaScript files
total_lines = 0
for file_result in eslint_results:
    if 'source' in file_result:
        total_lines += len(file_result['source'].split('\n'))

# Count error lines from error_df
error_lines = len(error_df)

print(f"Total lines of JavaScript code: {total_lines}")
print(f"Total error lines: {error_lines}")
print(f"Error rate: {error_lines/total_lines*100:.2f}%")

Total lines of JavaScript code: 2246
Total error lines: 198
Error rate: 8.82%
