In [8]:
import os
import pandas as pd

def process_folders(main_folder_path):
    # List to store all the information
    results = []
    print(f"Processing folder: {main_folder_path}")
    # Walk through all subfolders
    for root, dirs, files in os.walk(main_folder_path):
        # Filter for CSV files
        csv_files = [f for f in files if f.endswith('.csv')]
        print(f"Found {len(csv_files)} CSV files in {root}")
        for csv_file in csv_files:
            file_path = os.path.join(root, csv_file)
            print(f"Processing file: {file_path}")
            try:
                # Read the CSV file
                df = pd.read_csv(file_path)
                
                # Check if 'case_uri' column exists
                if 'case_uri' in df.columns:
                    # Get the first case_uri value (assuming all rows have same value)
                    case_uri = df['case_uri'].iloc[0]
                    
                    # Create dictionary with required information
                    result_dict = {
                        'subfolder': os.path.basename(root),
                        'file_name': csv_file,
                        'case_uri': case_uri
                    }
                    
                    results.append(result_dict)
                    
            except Exception as e:
                print(f"Error processing file {file_path}: {str(e)}")
    
    # Convert results to DataFrame
    result_df = pd.DataFrame(results)
    
    # Save to CSV
    result_df.to_csv('proper_case_law.csv', index=False)
    
    return result_df

# Define folder path and execute the function immediately
folder_path = "/Users/apple/Documents/Swansea/Projects/Caselaw_having_legislation_reference_and_proper_paragraph_numbering"
result = process_folders(folder_path)
print("Processing complete. Result shape:", result.shape)

Processing folder: /Users/apple/Documents/Swansea/Projects/Caselaw_having_legislation_reference_and_proper_paragraph_numbering
Found 0 CSV files in /Users/apple/Documents/Swansea/Projects/Caselaw_having_legislation_reference_and_proper_paragraph_numbering
Found 1263 CSV files in /Users/apple/Documents/Swansea/Projects/Caselaw_having_legislation_reference_and_proper_paragraph_numbering/2013
Processing file: /Users/apple/Documents/Swansea/Projects/Caselaw_having_legislation_reference_and_proper_paragraph_numbering/2013/RCW v A Local Authority.csv
Processing file: /Users/apple/Documents/Swansea/Projects/Caselaw_having_legislation_reference_and_proper_paragraph_numbering/2013/Norman, R v.csv
Processing file: /Users/apple/Documents/Swansea/Projects/Caselaw_having_legislation_reference_and_proper_paragraph_numbering/2013/Pokusa v Circuit Court In Olsztyn & Anor.csv
Processing file: /Users/apple/Documents/Swansea/Projects/Caselaw_having_legislation_reference_and_proper_paragraph_numbering/201

In [12]:
# read the CSV file and split the 'case_uri' column with https://caselaw.nationalarchives.gov.uk/
# and then whatever comes after that save it in a new column called 'court'
#save the file in same directory with the name 'proper_case_law.csv'
result['court'] = result['case_uri'].str.split('https://caselaw.nationalarchives.gov.uk/').str[1].str.split('/').str[0]
result.to_csv('proper_case_law.csv', index=False)
print("Final CSV saved as 'proper_case_law.csv'")

Final CSV saved as 'proper_case_law.csv'
