In [20]:
import pandas as pd
import json
import re
import csv
import os
import copy
from datetime import datetime
import numpy as np
from reportlab.lib import colors
from reportlab.lib.pagesizes import letter
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Spacer, Paragraph
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle

In [21]:
def read_csv_to_dict(file_path):
    data_dict_list = []
    
    with open(file_path, mode='r') as csv_file:
        csv_reader = csv.DictReader(csv_file)
        
        for row in csv_reader:
            data_dict_list.append(dict(row))
    
    return data_dict_list

In [22]:
def select_keys(d, keys):
    selected = {}
    for key in keys:
        value = d
        for subkey in key.split('.'):
            if isinstance(value, dict) and subkey in value:
                value = value[subkey]
            else:
                value = None
                break
        selected[key] = value
    return selected

In [23]:
 def get_deposit_types(json_data):   
    minmod_deposit_types = read_csv_to_dict("./codes/minmod_deposit_types.csv")
    deposit_id = {}
    for key in minmod_deposit_types:
        deposit_id[key['Minmod ID']] = key['Deposit type']

    data = []
    for item in json_data['deposit_type']:
        row = {
            "deposit type": deposit_id[item.split('/')[-1]],
        }
        data.append(row)

    # Creating DataFrame
    deposit_types = pd.DataFrame(data)
    return deposit_types

In [24]:
def get_document_ref(json_data):
    document_ref = json_data['MineralInventory'][0]['reference']['document']
    document_df = pd.DataFrame(document_ref)
    return document_df

In [25]:
def get_mineral_site(json_data):
    selected_keys = ["source_id", "name", "location_info.location","location_info.country", 
                     "location_info.crs", "location_info.state_or_province"]

    mineral_site = pd.DataFrame(select_keys(json_data, selected_keys), index=[0])
    return mineral_site

In [26]:
def get_mineral_inventory(json_data):
    minmod_commodities = read_csv_to_dict("./codes/minmod_commodities.csv")
    commodities = {}
    for key in minmod_commodities:
        commodities[key['minmod_id']] = key['CommodityinGeoKb']

    minmod_units = read_csv_to_dict("./codes/minmod_units.csv")
    correct_units = {}
    for key in minmod_units:
        correct_units[key['minmod_id']] =  key['unit name']

    # Extracting required fields with cutoff_unit and cutoff_value
    data = []
    for item in json_data['MineralInventory']:
        row = {
            "zone": item.get("zone"),
            "page_number": item["reference"]["page_info"][0]["page"],
            "commodity": commodities[item.get("commodity").split('/')[-1]],
            "category": item["category"][0].split('/')[-1],
            "ore_unit": correct_units[item["ore"]["ore_unit"].split('/')[-1]],
            "ore_value": item["ore"]["ore_value"],
            "grade_unit": correct_units[item["grade"]["grade_unit"].split('/')[-1]],
            "grade_value": item["grade"]["grade_value"],
            "cutoff_unit": correct_units[item["cutoff_grade"].get("grade_unit").split('/')[-1]] if "cutoff_grade" in item else None,
            "cutoff_value": item["cutoff_grade"].get("grade_value") if "cutoff_grade" in item else None
        }
        data.append(row)

    # Creating DataFrame
    mineral_inventory = pd.DataFrame(data)
    return mineral_inventory

In [28]:
from reportlab.lib import colors
from reportlab.lib.pagesizes import letter
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Spacer, Paragraph
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle

def create_pdf_with_tables(dataframes, output_file):
    doc = SimpleDocTemplate(output_file, pagesize=letter)
    elements = []
    styles = getSampleStyleSheet()

    # Define a style for wrapped text inside table cells
    cell_style = styles['Normal']
    cell_style.wordWrap = 'CJK'  # Use 'CJK' word wrapping which wraps on any character

    for name, dataframe in dataframes.items():
        # Add title above each table
        title_style = ParagraphStyle('TitleStyle', parent=styles['Normal'], alignment=0, fontName='Helvetica-Bold')  # Left alignment and bold font
        title = Paragraph(name, title_style)
        elements.append(title)
        
        # Convert DataFrame to list of lists
        # Ensure each cell in the dataframe is wrapped with a Paragraph object
        table_data = [[Paragraph(str(cell), cell_style) for cell in dataframe.columns]]  # Header row
        for row in dataframe.values.tolist():
            table_data.append([Paragraph(str(cell), cell_style) for cell in row])  # Data rows

        # Calculate the width of the table to fit within the page width
        table_width = doc.width * 0.8  # Set the width to 80% of the page width
        
        # Create table with column width calculated to fit the page
        table = Table(table_data, colWidths=[table_width / len(dataframe.columns)] * len(dataframe.columns))

        # Add style to the table
        style = TableStyle([
            # ... (keep existing style definitions)
        ])
        
        table.setStyle(style)
        elements.append(table)

        # Add space between tables
        elements.append(Spacer(1, 24))

    # Build PDF with all tables
    doc.build(elements)

In [29]:
folder_path = './extracted/'

files = os.listdir(folder_path)
print(files)

['Penasquito_Zn_Pb_Ag_Au_3-2016_OM_summary_20240205_105558.json', '.ipynb_checkpoints', 'Penasquito_Au_Ag_Pb_Zn_8-2018_OM_summary_20240202_131212.json', 'Nuestra_Senora_Zn_Ag_4-2013_PEA_summary_20240201_124310.json']


In [30]:
file_path = folder_path + files[0]

with open(file_path, 'r') as f:
    # Load the JSON data into a Python dictionary
    data = json.load(f)

# Now 'data' contains the contents of the JSON file
json_data = data['MineralSite'][0]
document_df = get_document_ref(json_data)
mineral_site = get_mineral_site(json_data)
mineral_inventory = get_mineral_inventory(json_data)
deposit_types = get_deposit_types(json_data)


In [31]:
data_dict = {'Document Reference':document_df, 'Mineral Site ':mineral_site, 
             'Mineral Inventory':mineral_inventory, 'Deposit Types': deposit_types}
create_pdf_with_tables(data_dict, './created_pdf/tables.pdf')
