In [1]:
import pandas as pd
import numpy as np
import pdfplumber

import time
import os
import sys
from pathlib import Path
import csv
import re


import PyPDF2



In [2]:
def get_file_name(file_path):
    
    '''get files under certain path

    Args:
        file path,string

    Returns:
        A dictionary for stroing the file name and it's path

    Raise;
        ValueError,file path name

    '''
    file_dir ={}
    for dirpath, dirnames, files in os.walk(file_path, topdown=False): ## work down certain dir,has 3 returns
        
        for file_name in files:
            if file_name.endswith('.pdf'):
                file_dir[file_name] = os.path.join(dirpath,file_name)
            
    return file_dir

In [3]:
file_dir = get_file_name('./well_logs')

In [4]:
def get_survey_path(file_dir):
    
    '''get file contain 'survey'

    Args:
        file path,string

    Returns:
        A dictionary for stroing the file name and it's path

    Raise;
     
    '''
    survey_path ={}
    pattern = r'.*[Ss]urvey'                                    #pattern
    for file,path in file_dir.items():
        find_file = re.search(pattern,file)                     #find the file with such pattern
        if find_file:
            file_name = find_file.group(0)                      #save the name for matching the pattern
            survey_path[file_name] = path                       # save file name to dictionary
            
    return survey_path
            

In [6]:
survey_path = get_survey_path(file_dir)

In [7]:
def get_survey_content(survey_path):

    '''get pdf content from the survey_path
    code source:https://cloud.tencent.com/developer/article/1386517

    Args:
        survey_path, dict

    Returns:
        tables generator

    Raise;
        file not found error
    '''
    file_table = {}
    for file,path in survey_path.items():
        tables = []                                                      # save each page table to tables container
        pdf = pdfplumber.open(path)
        print('preprocess file of',path)                                  #start of preprocess file
        for page in pdf.pages:
            for pdf_table in page.extract_tables():
                table = [] ## each page saved to one table
                cells = []
                for row in pdf_table:
                    if not any(row):
                        # 如果一行全为空，则视为一条记录结束
                        if any(cells):
                            table.append(cells)
                            cells = []
                    elif all(row):
                        # 如果一行全不为空，则本条为新行，上一条结束
                        if any(cells):
                            table.append(cells)
                            cells = []
                        table.append(row)
                    else:
                        if len(cells) == 0:
                            cells = row
                        else:
                            for i in range(len(row)):
                                if row[i] is not None:
                                    cells[i] = row[i] if cells[i] is None else cells[i] + row[i]
                tables.append(table) ## append each page to one file container
                
                for row in table:
                    row = [re.sub('\s', '}', cell) if cell is not None else None for cell in row]
#                 print('---------- dash line ----------') ## end of preprocess the file pages
        file_table[path] = tables

        yield file_table


In [9]:

def initial_preprocess(file_table):
    '''for each file,record the table with well projection data
    
    Args:
        file_table generator
    Returns:
    
    '''
    preprocessed_file ={}
    output_csv = 'wells_survey'
    
    try:
        for file_pages in get_survey_content(survey_path):

            for file,tables in file_pages.items() : ##iterate through
        #         print(file,tables)
                file_tables = []
                for table in tables:
                    table_rows = []
                    if len(table) == 8 or len(table) == 9:
                        for i,row in enumerate(table):
            #                 print(f'this is {i} row from one table',row,type(row))
                            row_np = np.array(row)
                            row_np_squeeze = row_np.squeeze()

        #                     print(row_np_squeeze.shape)
                            string_np = np.array2string(row_np_squeeze)
                            table_rows.append(string_np) ## append rows to table
    #                         print(type(string_np))
    #                         print(row_np_squeeze,type(row_np_squeeze))
                            with open(output_csv, 'w+') as f:
                                f.write(string_np)


                    file_tables.append(table_rows) ## append table to tables
                print('-------end of file-----------') ## end of preprocess the file
            preprocessed_file[file] = file_tables ## correlate file with tables
            
    except ValueError as e:
        print(e)
        
    return preprocessed_file

preprocess file of ./well_logs\well_logs\01\trackingNo_203729\05535133 Gyro Survey001.pdf
-------end of file-----------
preprocess file of ./well_logs\well_logs\01\trackingNo_203729\42-055-35133 Surveys.pdf
-------end of file-----------
-------end of file-----------
preprocess file of ./well_logs\well_logs\01\trackingNo_205798\Korn A 3H Surveys.pdf
-------end of file-----------
-------end of file-----------
-------end of file-----------
preprocess file of ./well_logs\well_logs\01\trackingNo_212488\MG LILLIE 1616 GU B 7H-Final Gyro Survey 0-4231.36.pdf
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
preprocess file of ./well_logs\well_logs\08\trackingNo_222775\Survey Report-Garrett-Snell Unit A 25-36 No. 1SH(Final)_.pdf
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
preprocess file of ./well_logs\well_logs\7C\track

preprocess file of ./well_logs\well_logs\01\trackingNo_218952\FINAL_MWD Survey_Cooke Ranch C 5H.pdf
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
preprocess file of ./well_logs\well_logs\01\trackingNo_218948\Winship 1101H MWD Surveys.pdf
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file----------

-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
preprocess file of ./well_logs\well_logs

preprocess file of ./well_logs\well_logs\01\trackingNo_221107\BHPBilliton_STS B 53H_Certified Surveys.pdf
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file------

preprocess file of ./well_logs\well_logs\01\trackingNo_221592\64687 - Final Survey Report - ST01.pdf
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------

preprocess file of ./well_logs\well_logs\01\trackingNo_221993\Irvin Minerals South E 4H Surveys.pdf
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------


-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end

preprocess file of ./well_logs\well_logs\01\trackingNo_222051\Quetzal C 3H - Survey Report.pdf
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-----

-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end

preprocess file of ./well_logs\well_logs\01\trackingNo_222113\Lothringher No. 2H Survey.pdf
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------e

-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end

-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end

-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end

preprocess file of ./well_logs\well_logs\01\trackingNo_222379\Lonestar_Burns Eagleford E Unit 13H_Certified Surveys_198 MD to 14967 MD.pdf
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file---

-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end

-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end

-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end

-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
preprocess file of ./well_logs\well_logs\01\trackingNo_222818\De La Garza 3H MWD Surveys.pdf
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------

preprocess file of ./well_logs\well_logs\01\trackingNo_223068\Rio Bravo State EF A 3H Surveys.pdf
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
--

-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end

-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end

-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end

-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end

-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end

-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end

preprocess file of ./well_logs\well_logs\02\trackingNo_219428\Gebauer-Sklar 1H Survey Data.pdf
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-----

-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end

preprocess file of ./well_logs\well_logs\02\trackingNo_221157\McCartney_Unit_2H_Final_Surveys.pdf
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
--

preprocess file of ./well_logs\well_logs\02\trackingNo_221866\Penn Virginia_Deedra Lori (SA) Unit 3 3H_Certified Surveys_9962 MD to 12969 MD.pdf
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of f

preprocess file of ./well_logs\well_logs\02\trackingNo_221955\Pop Unit 9H - Survey Report.pdf
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
------

-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end

-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end

preprocess file of ./well_logs\well_logs\02\trackingNo_222153\Lonestar_Buchhorn 5H_Certified Surveys_195 MD to 20169 MD.pdf
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
------

preprocess file of ./well_logs\well_logs\02\trackingNo_222162\Lonestar_ Buchhorn 6H_Certified Surveys_195 MD to 20194 MD.pdf
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-----

-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end

-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end

-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end

-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end

-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
preprocess file of ./well_logs\well_logs\03\trackingNo_214633\Lavaca_Myska #4_Certified Surveys.pdf
-------end of file-----------
-------end of file-----------


preprocess file of ./well_logs\well_logs\03\trackingNo_216253\Brazos Farms G 1H Final Surveys.pdf
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
--

preprocess file of ./well_logs\well_logs\03\trackingNo_218104\Directional Surveys - SVR #66 Lat 3.pdf
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file----------

-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end of file-----------
-------end

KeyboardInterrupt: 

In [61]:
def second_preprocess(survey_return):
    '''preprocess survey to data to have clean data format
    Args:
        survey_return ,dictionary
    Returns:
        cleaned result, dictionary
    Raise:
    
    '''
    file_splited_rows = {}
    for key,value in preprocessed_file.items():              # terate the dictionary
        value_table =[]                                       # container to save cleaned list
        value_np = np.array(value)
        value_np_squ = value_np.squeeze()                     # squeeze down the numpy array
        for array in value_np_squ:                  
            if len(array)> 0:                                 # chose those has element
                for i in range(0,len(array)):                 # iterate the array
                    row_split = array[i].split('\\n')          # get the splitted data
                    for row in row_split:                     # iterate throught the splited data
                        value_table.append(row)                # append to container

        file_splited_rows[key] = value_table                   #save data to dictionary

    return file_splited_rows

In [None]:
def main():
    
    file_folder = './well_logs'                                # file folder ready to be preprocessed
    pdf_files = get_file_name(file_folder)                     # get all the pdf files
    pdf_survey_files = get_survey_path(pdf_files)              # get all the survey pdf files
    file_table = get_survey_content(pdf_survey_files)          # get content from the survey pdf files
    for table in file_table:                                  # iterate the generator
        preprocessed_file = initial_preprocess(table)          # fist preprocess to have the demanding data
        file_splited_rows =second_preprocess(survey_return)    #
                                    