In [380]:
import pandas as pd
import json
import re
import csv
import os
import copy
from datetime import datetime
import glob
import numpy as np
from reportlab.lib import colors
from reportlab.lib.pagesizes import letter, landscape
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Spacer, Paragraph
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle

# Create the pdf functions

In [383]:
def collect_json_files(directory):
    json_files = []
    for file_path in glob.glob(os.path.join(directory, '*.json')):
         json_files.append(os.path.basename(file_path))
    return json_files


In [385]:
def read_csv_to_dict(file_path):
    data_dict_list = []
    
    with open(file_path, mode='r') as csv_file:
        csv_reader = csv.DictReader(csv_file)
        
        for row in csv_reader:
            data_dict_list.append(dict(row))
    
    return data_dict_list

In [387]:
def select_keys(d, keys):
    selected = {}
    for key in keys:
        value = d
        for subkey in key.split('.'):
            if isinstance(value, dict) and subkey in value:
                value = value[subkey]
            else:
                value = None
                break
        selected[key] = value
    return selected

In [389]:
 def get_deposit_types(json_data):   
    minmod_deposit_types = read_csv_to_dict("../codes/minmod_deposit_types.csv")
    deposit_id = {}
    for key in minmod_deposit_types:
        deposit_id[key['Minmod ID']] = key['Deposit type']

    data = []
    for item in json_data['deposit_type_candidate']:
        row = {
            "observed deposit type": item.get('observed_name') if 'observed_name' in item else None,
            "normalized id": deposit_id.get(item.get('normalized_uri').split('/')[-1]) if 'normalized_uri' in item and item.get('normalized_uri').split('/')[-1] in deposit_id else None
        }
        data.append(row)

    deposit_types = pd.DataFrame(data)
    return deposit_types

In [391]:
def get_document_ref(json_data):
    if 'mineral_inventory' in json_data and json_data['mineral_inventory']:
        if 'reference' in json_data['mineral_inventory'][0] and 'document' in json_data['mineral_inventory'][0]['reference']:
            document_ref = json_data['mineral_inventory'][0]['reference']['document']
            document_df = pd.DataFrame.from_dict(document_ref, orient='index').T
            return document_df
    # If the 'document' key does not exist, or if any of the necessary keys are missing, return None or raise an error
    return pd.DataFrame() 

In [393]:
def get_mineral_site(json_data):
    selected_keys = ["record_id", "name", "location_info.location","location_info.country.observed_name", 
                     "location_info.country.normalized_uri", "location_info.state_or_province.observed_name", 
                      "location_info.state_or_province.normalized_uri"]
    
    for loc_value in ["country", "state_or_province"]:
        minmod_value = read_csv_to_dict(f"../codes/{loc_value}.csv")
        correct_value = {}
        for row in minmod_value: 
            correct_value[row["\ufeffminmod_id"]] = row["name"]

        
        location_info = json_data.get("location_info", {})
        name = location_info.get(str(loc_value), {})
        normalized_uri = name.get("normalized_uri", "")
        
        
        # Only proceed if normalized_uri exists and is a string
        if normalized_uri:
            norm_value = normalized_uri.split("/")[-1]
            
            # Ensure norm_value exists in correct_value
            if norm_value in correct_value:
                new_value = correct_value[norm_value]
                json_data['location_info'][loc_value]['normalized_uri'] = new_value
 
    
    mineral_site = pd.DataFrame(select_keys(json_data, selected_keys), index=[0])
    
    
    return mineral_site

In [395]:
def convert_normalized_uri(norm_id, codes):
    if norm_id:
        minmod_id = norm_id.split('/')[-1]
        return codes[minmod_id]
    else:
        return ""


def get_mineral_inventory(json_data):
    minmod_commodities = read_csv_to_dict("../codes/minmod_commodities.csv")
    commodities = {key['minmod_id']: key['CommodityinGeoKb'] for key in minmod_commodities}

    minmod_units = read_csv_to_dict("../codes/minmod_units.csv")
    correct_units = {key['minmod_id']: key['unit name'] for key in minmod_units}
   
    # Extracting required fields with normalized_uri, observed_name, cutoff_unit, and cutoff_value
    data = []
    for item in json_data['mineral_inventory']:
        row = {
            "zone": item.get("zone", None),
            "page_number": item.get("reference", {}).get("page_info", [{}])[0].get("page", ""),
            "commodity_observed_name": item["commodity"].get("observed_name", "") if "commodity" in item else None,
            "commodity_normalized_uri": convert_normalized_uri(item["commodity"].get("normalized_uri", ""), commodities),
            "category": [
                category.get("normalized_uri", "").split('/')[-1] for category in item.get("category", [])
            ],
            "ore_value": item.get("ore", {}).get("value", None),
            "ore_unit_observed_name": item.get("ore", {}).get("unit", {}).get("observed_name", None),
            "ore_unit_normalized_uri": convert_normalized_uri(item.get("ore", {}).get("unit", {}).get("normalized_uri", ""), correct_units),
            "grade_value": item.get("grade", {}).get("value", None),
            "grade_unit_observed_name": item.get("grade", {}).get("unit", {}).get("observed_name", None),
            "grade_unit_normalized_uri": convert_normalized_uri(item.get("grade", {}).get("unit", {}).get("normalized_uri", ""), correct_units),
            "cutoff_grade_value": item.get("cutoff_grade", {}).get("value", None),
            "cutoff_grade_unit_observed_name": item.get("cutoff_grade", {}).get("unit", {}).get("observed_name", None),
            "cutoff_grade_unit_normalized_uri": convert_normalized_uri(item.get("cutoff_grade", {}).get("unit", {}).get("normalized_uri", ""), correct_units)
        }
        data.append(row)

    # Creating DataFrame
    mineral_inventory = pd.DataFrame(data)
    return mineral_inventory


In [411]:
def create_pdf_with_tables(dataframes, output_file):
    doc = SimpleDocTemplate(output_file, pagesize=landscape(letter))
    elements = []
    styles = getSampleStyleSheet()

    # Define a style for wrapped text inside table cells
    cell_style = styles['Normal']
    cell_style.wordWrap = 'CJK'  
    cell_style.fontSize = 10

    for name, dataframe in dataframes.items():
        # Add title above each table
        title_style = ParagraphStyle('TitleStyle', parent=styles['Normal'], alignment=0, fontName='Helvetica-Bold')  # Left alignment and bold font
        title = Paragraph(name, title_style)
        elements.append(title)
        
        # Convert DataFrame to list of lists
        # Ensure each cell in the dataframe is wrapped with a Paragraph object
    
        if len(dataframe.columns)>0:
            table_data = [[Paragraph(str(cell), cell_style) for cell in dataframe.columns]]  # Header row
            for row in dataframe.values.tolist():
                table_data.append([Paragraph(str(cell), cell_style) for cell in row])  # Data rows

            # Calculate the width of the table to fit within the page width
            table_width = doc.width * 0.98  

            # Create table with column width calculated to fit the page

            table = Table(table_data, colWidths=[table_width / len(dataframe.columns)] * len(dataframe.columns))

            # Add style to the table
            style = TableStyle([
                ('BACKGROUND', (0, 0), (-1, 0), colors.grey),
                ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
                ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
                ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
                ('FONTNAME', (0, 1), (-1, -1), 'Helvetica'),  # Set font to Helvetica for table data
                ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
                ('BACKGROUND', (0, 1), (-1, -1), colors.beige),
                ('GRID', (0, 0), (-1, -1), 1, colors.black),
                ('TEXTCOLOR', (0, 0), (-1, -1), colors.black),  # Set text color to black
                ('INNERGRID', (0, 0), (-1, -1), 0.25, colors.black),  # Set inner grid color
                ('BOX', (0, 0), (-1, -1), 0.25, colors.black),  # Draw a border around each cell
                ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),  # Center text vertically
                ('ALIGN', (0, 0), (-1, -1), 'CENTER'),  # Center text horizontally
                ('FONTNAME', (0, 0), (-1, -1), 'Helvetica'),  # Set font to Helvetica for all cells
                ('SIZE', (0, 0), (-1, -1), 9),  # Set initial font size to 9
                ('TEXTFONT', (0, 0), (-1, -1), 'Helvetica'),  # Set font to Helvetica for all cells
                ('LEADING', (0, 0), (-1, -1), 9),  # Set initial leading (line spacing) to 9
                ('BACKGROUND', (0, 0), (-1, 0), colors.gray)  # Set header row background color
            ])

            table.setStyle(style)
            elements.append(table)

            # Add space between tables
            elements.append(Spacer(1, 12))

        # Build PDF with all tables
    doc.build(elements)


# Get list of ids
1) Regional Magmatic Ni Upper Midwest: Minnesota, North Dakota, Wisconsin, Iowa, South Dakota, and a little bit of Nebraska
2) Regional Lacustrine Lithium (Brines and Clays) Southwest US: Nevada, Utah, Arizona, California

In [321]:
def get_source_ids(files, allowed_states):
    source_ids = []
    for file in files:
        print(f"working on file: {file}")
        file_path = folder_path + file
        filename = file[:-5]
        
        with open(file_path, 'r') as f:
            # Load the JSON data into a Python dictionary
            data = json.load(f)
    
            json_data = data['MineralSite'][0]
            mineral_site = get_mineral_site(json_data)
            
            country = mineral_site["location_info.country.normalized_uri"][0]
            state = mineral_site["location_info.state_or_province.normalized_uri"][0]
    
            print(f"country: {country}, state: {state}")
            if country == "USA" and state in allowed_states:
                source_ids.append(filename)
            print()
    return source_ids

In [323]:
folder_path = '../extracted/twelve_month/nickel/completed/'
NI_files = collect_json_files(folder_path)
NI_allowed_states = ['Minnesota', 'North Dakota', 'Wisconsin', 'Iowa', 'South Dakota', 'Nebraska']

nickel_source_ids = get_source_ids(NI_files, NI_allowed_states)


working on file: 021e324a0f4833110e3bb480b95c2c694790d51dfef30104ccfd83d1c50cb0c504_NI_43-101_Technical_Report_for_the_Escape_Lake_Project_in_Canada_dated_October_2019_summary_20240628.json
country: Canada, state: Ontario

working on file: 0270ac43bf7800b950a27295e6cd7a775388fc15371adfb2cc20f2dda24b03b0b4_NI_43-101_Technical_Report_for_the_North_Range_Project_in_North_America_dated_2011_summary_20240628.json
country: Canada, state: Ontario

working on file: 023ea2f158c10fbb2684c5f57e26717a44ae25587460edb01d80d4cff7b60cd4e3_NI_43-101_Technical_Report_for_the_East_Rim-West_Rim_Project_in_North_America_dated_September_2008_summary_20240628.json
country: Canada, state: Ontario

working on file: 029f78e405c1499f678a969facfaedabcfce0999c8e6e1be9756facb9d0a96133a_NI_43-101_Technical_Report_for_the_Thierry_Project_in_North_America_dated_March_2001_summary_20240628.json
country: Canada, state: Ontario

working on file: 029db96ee6956a5c4fc8e3c97aeeb437692f771de262cae7ca5feb5dd20fc174d2_NI_43-101

In [325]:
folder_path = '../extracted/twelve_month/lithium/completed/'
LI_files = collect_json_files(folder_path)
LI_allowed_states = ["Nevada", "Utah", "Arizona", "California"]

lithium_source_ids = get_source_ids(LI_files, LI_allowed_states)

working on file: 02733b88f2e890d17442c54e019544695adeb90d8e519aecd8a8c4757260eadc8a_ni_43_101_technical_report_for_the_suwar_project_in_middle_east_yemen_dated_september_2000_summary_20240628.json
country: Yemen, state: None

working on file: 02438bcc98fde0b10d03d369b1c152f2dc9608dfdea90737ab989ef665d6abffb3_NI_43-101_Technical_Report_for_the_Whabouchi_Project_in_North_America_dated_June_2014_summary_20240711.json
country: Canada, state: Quebec

working on file: 02e85e9e11bae58cbf9cfa4d21e56d97ceb937e1d8a02c3a0d853b4c1deafc2a78_NI_43-101_Technical_Report_for_the_Drumheller_project_in_Canada_dated_February_2022_summary_20240710.json
country: Canada, state: Alberta

working on file: 023a009a3f0632b3697a8c102628f14095592aa4605578ae4815889413861f4911_NI_43-101_Technical_Report_for_the_Quebec_Project_in_North_America_dated_May_2010_summary_20240710.json
country: Canada, state: Quebec

working on file: 023510e3a7d7c69e6a9668834ad0f451ef1ba8330b15cda5bf03602d3a541e06d7_ni_43_101_technical_rep

In [326]:
print(f"Total found for Nickel : {len(nickel_source_ids)}")
print(f"Total found for Lithium : {len(lithium_source_ids)}")

Total found for Nickel : 4
Total found for Lithium : 63


# Run the collection

In [426]:
commodity = 'nickel'
folder_path = f'../extracted/twelve_month/{commodity}/completed/'
files =  nickel_source_ids #collect_json_files(folder_path)
# print(files)

In [428]:
for file in files:
    print(f"working on file: {file}")
    file_path = folder_path + file + ".json"
    filename = file[:-5]
    
    with open(file_path, 'r') as f:
        # Load the JSON data into a Python dictionary
        data = json.load(f)

    # Now 'data' contains the contents of the JSON file
    json_data = data['MineralSite'][0]
    document_df = get_document_ref(json_data)
    mineral_site = get_mineral_site(json_data)
    mineral_inventory = get_mineral_inventory(json_data)
    # print(mineral_inventory)
    deposit_types = get_deposit_types(json_data)
    data_dict = {'Document Reference':document_df, 'Mineral Site ':mineral_site, 
                 'Mineral Inventory':mineral_inventory, 'Deposit Types': deposit_types}
    create_pdf_with_tables(data_dict, f'../created_pdf/{commodity}/{filename}.pdf')
    print(f"Finished file {file}")

working on file: 02cf1ccba2286a2b5138ee9957a4fd33c971f0fdcca0b4e0b43c6015ee202ebe1b_NI_43-101_Technical_Report_for_the_Nokomis_Project_in_Minnesota,_United_States_dated_August_2007_summary_20240628
Finished file 02cf1ccba2286a2b5138ee9957a4fd33c971f0fdcca0b4e0b43c6015ee202ebe1b_NI_43-101_Technical_Report_for_the_Nokomis_Project_in_Minnesota,_United_States_dated_August_2007_summary_20240628
working on file: 02617e997aed8a915bce54540938b7568fdd82d43a87b7e5a3b7805e0b2b614e2b_NI_43-101_Technical_Report_(PEA)_for_the_Tamarack_North_Project_in_Minnesota,_United_States_dated_2020_summary_20240628
Finished file 02617e997aed8a915bce54540938b7568fdd82d43a87b7e5a3b7805e0b2b614e2b_NI_43-101_Technical_Report_(PEA)_for_the_Tamarack_North_Project_in_Minnesota,_United_States_dated_2020_summary_20240628
working on file: 021d815ab42768146b121313260e1c3b15714ef7ef597ef121b35f143aafc246bd_NI_43-101_Technical_Report_for_the_Tamarack_South_Project_in_Minnesota,_United_States_dated_2018_summary_20240628
Fini