In [11]:
import json
# Load the JSON file
with open("/content/final_scores.json", "r") as file:
    data = json.load(file)

# Count the number of PDFs with the second value greater than 7.5
count = sum(1 for value in data.values() if float(value) >= 6.75)

print("Count of PDFs with value > 6.75:", count)

Count of PDFs with value > 6.75: 91


In [14]:
import pandas as pd
def json_to_csv_pandas(json_filepath, csv_filepath, full_pdf_name=True):
    """
    Converts a JSON file to a CSV file using pandas, with option for full PDF name.

    Args:
        json_filepath: Path to the input JSON file.
        csv_filepath: Path to the output CSV file.
        full_pdf_name: If True (default), includes ".pdf" in the PDF name column.
                       If False, only includes the base name (e.g., "P001").
    """
    try:
        with open(json_filepath, 'r') as json_file:
            data = json.load(json_file)

        pdf_names = []
        scores = []
        for entry in data:
            try:
                pdf_full_name = entry[0]
                score = entry[1]
                if full_pdf_name:
                    pdf_names.append(pdf_full_name)
                else:
                    pdf_names.append(pdf_full_name.replace(".pdf", ""))  # Remove ".pdf"
                scores.append(score)
            except (IndexError, TypeError) as e:
                print(f"Warning: Invalid entry format: {entry}. Skipping. Error: {e}")

        df = pd.DataFrame({'PDF Name': pdf_names, 'Score': scores})
        df.to_csv(csv_filepath, index=False, encoding='utf-8') #index=False to prevent index being written to the csv

        print(f"Successfully converted {json_filepath} to {csv_filepath}")

    except FileNotFoundError:
        print(f"Error: JSON file not found at {json_filepath}")
    except json.JSONDecodeError:
        print(f"Error: Invalid JSON format in {json_filepath}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")

In [15]:
csv_path="/content/final_scores.csv"
json_path="/content/final_scores.json"

json_to_csv_pandas(json_path,csv_path)

Successfully converted /content/final_scores.json to /content/final_scores.csv


In [17]:
df=pd.read_csv("/content/pdf_scores.csv")
print(df.shape)
df['publishability'] = (df['Score'].astype(float) >= 6.75).astype(int)
df

(135, 2)


Unnamed: 0,PDF Name,Score,publishability
0,P001,7.00,1
1,P002,2.50,0
2,P003,5.75,0
3,P004,6.75,1
4,P005,7.75,1
...,...,...,...
130,P131,7.50,1
131,P132,4.75,0
132,P133,7.25,1
133,P134,2.33,0


In [19]:
df.to_csv("/content/final_scores_pdf.csv")