# Preprocessing

Preprocessing and further cleaning of the SARIG datasets

In [4]:
import os 
# import pygslib
import numpy as np
import pandas as pd
import plotly.express as px

data_dir = os.path.join(os.path.dirname(os.getcwd()), 'data')

In [5]:
import os
import pandas as pd
import numpy as np
import glob
xyz_dir = os.path.join(os.path.dirname(os.getcwd()), 'data', 'XYZ', "tensor.csv")
df = pd.read_csv(xyz_dir)

In [6]:
xyz_dir = os.path.join(os.path.dirname(os.getcwd()), 'data', 'XYZ')
df.to_csv(os.path.join(xyz_dir,'locations.csv'), index=False)

In [7]:
df.drop(df.columns.difference(['EASTING','NORTHING']), 1, inplace=True)
df

  df.drop(df.columns.difference(['EASTING','NORTHING']), 1, inplace=True)


Unnamed: 0,EASTING,NORTHING
0,113491.373,6.974338e+06
1,113741.373,6.974338e+06
2,113991.373,6.974338e+06
3,114241.373,6.974338e+06
4,114491.373,6.974338e+06
...,...,...
6391565,788241.373,6.384088e+06
6391566,788491.373,6.384088e+06
6391567,788741.373,6.384088e+06
6391568,788991.373,6.384088e+06


In [8]:
df = pd.read_csv(f'{data_dir}/Assays.csv')
df

Unnamed: 0,sample_id,ele,result,old_unit,method,fixed_result,isUDL,isNEG,isINT
0,458873_SA,Fe,82000.000000,%,AAS2,,,,
1,458873_SA,Mn,670.000000,ppm,AAS2,,,,
2,458873_SA,Cr,4.000000,ppm,AAS2,,1.0,,
3,458873_SA,Ti,9500.000000,%,COL,,,,
4,458873_SA,Cu,200.000000,ppm,AAS1,,,,
...,...,...,...,...,...,...,...,...,...
21030710,2978525_SA,Tm,3.090000,ppm,FB6/MS,,,,
21030711,2978525_SA,U,817.757802,ppm,FB6/MS,,,,
21030712,2978525_SA,W,124.000000,ppm,FB6/MS,,,,
21030713,2978525_SA,Y,199.400000,ppm,FB6/MS,,,,


#  Remove Inaccurate and Unfixable Datapoints from the Dataset

* Invalid datapoints are set to -777. Remove these points from the dataset.
* Imupute datapoints with below-detection-limit values with a value half below the detection limit which returns similar results to lognormal replacement but is much less computationally expensive. 
* Displaying of datapoints with negative values in the dataset

In [9]:
df = df[df['isUDL'] != -777]
df.loc[df['fixed_result'].notna(), 'result'] = df['fixed_result']
df[df['isNEG']==1].sort_values(by='result')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(ilocs[0], value, pi)


Unnamed: 0,sample_id,ele,result,old_unit,method,fixed_result,isUDL,isNEG,isINT
326978,408157_SA,Sn,0.000000,g/T,UKN,,,1.0,1.0
325739,408033_SA,W,0.000000,g/T,UKN,,,1.0,1.0
323014,637941_SA,Te,0.300000,g/T,UKN,,,1.0,1.0
340899,1478298_SA,Nb,0.699056,g/T,XRF,,,1.0,1.0
325575,637681_SA,Nb,0.699056,g/T,,,,1.0,1.0
...,...,...,...,...,...,...,...,...,...
341055,1478322_SA,Te,62.000000,g/T,XRF,,,1.0,1.0
321873,637788_SA,Te,65.000000,g/T,UKN,,,1.0,1.0
327624,407995_SA,Te,70.000000,g/T,UKN,,,1.0,1.0
321669,637769_SA,Ba,117.000000,g/T,UKN,,,1.0,1.0


# Removal of Negative and Zero Datapoints
* As seen in the above table, there are many datapoints with a negative value
* Impute these datapoints with a value hald of the detection limit similar to -777 datapoints
* Remove datapoints with 0ppm
* Pivot the shape of the dataset

In [10]:
df = df[df['isNEG'] != 1]
df.loc[df['isUDL'] == 1, 'result'] = df['result']/2
df = df[df['result']!=0]
df['sample_id'] = df['sample_id'].str.replace('_SA', '')
df_p = pd.pivot_table(df, index=['sample_id', 'method'], columns='ele', values='result')
df_p = df_p.rename_axis(None, axis=1).reset_index()
df_p

Unnamed: 0,sample_id,method,Ag,Al,As,Au,B,Ba,Be,Bi,...,Ti,Tl,Tm,U,V,W,Y,Yb,Zn,Zr
0,10013,ICP,1.0,,13.0,,,470.0,,2.0,...,,,,,,,,,,
1,10014,ICP,1.0,,15.0,,,500.0,,2.0,...,,,,,,,,,,
2,10015,ICP,0.5,,11.0,,,480.0,,2.0,...,,,,,,,,,,
3,10016,ICP,1.0,,23.0,,,580.0,,2.0,...,,,,,,,,,,
4,10017,ICP,1.0,,25.0,,,600.0,,2.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2820539,9995,AES,,,,,,,,,...,,,,6.0,,5.0,,,,
2820540,9996,AES,,,,,,,,,...,,,,2.0,,5.0,,,,
2820541,9997,AES,,,,,,,,,...,,,,2.0,,5.0,,,,
2820542,9998,AES,,,,,,,,,...,,,,4.0,,5.0,,,,


# Add Collar and Sample Data
* The GIS software QGIS was used to merge the collar and sample data into this single dataframe
* Cropping of the input data to only include data within the allocation area of interest

In [11]:
#loading in the sample and collar data
df_s = pd.read_csv(f'{data_dir}/Sample_Info.csv')
df_l = pd.read_csv(f'{data_dir}/Collars.csv')

df_s['sample_id'] = df_s['sample_id'].str.replace('_SA', '')
df_s['collar_id'] = df_s['collar_id'].str.replace('_SA', '')
df_l['collar_id'] = df_l['collar_id'].str.replace('_SA', '')

tmp = df_l.merge(df_s, on = 'collar_id', validate = '1:m')
df = tmp.merge(df_p, on = 'sample_id', validate = '1:m')
df

  exec(code_obj, self.user_global_ns, self.user_ns)


Unnamed: 0,collar_id,collar_nam,max_depth,date_drill,lon,lat,dip,azi,reference,xcoord,...,Ti,Tl,Tm,U,V,W,Y,Yb,Zn,Zr
0,436,NULLARBOR PLAINS 6,304.80,10/09/1899,131.193825,-31.149046,-90.0,,ENV 06488,137080.83819,...,,,,,,,,,2400.0,
1,577,NULLARBOR PLAINS 7,162.15,31/12/1900,131.597958,-31.047286,-90.0,,ENV 06488,175290.92394,...,,,,,,,,,91.0,
2,577,NULLARBOR PLAINS 7,162.15,31/12/1900,131.597958,-31.047286,-90.0,,ENV 06488,175290.92394,...,3357.112883,,,,,,,,,
3,577,NULLARBOR PLAINS 7,162.15,31/12/1900,131.597958,-31.047286,-90.0,,ENV 06488,175290.92394,...,,,,,,,,,,
4,577,NULLARBOR PLAINS 7,162.15,31/12/1900,131.597958,-31.047286,-90.0,,ENV 06488,175290.92394,...,,0.25,0.17,1.100000,62.0,1.0,10.4,1.07,,139.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1555998,333102,RU41-11783,280.00,26/10/2018,136.897367,-30.447016,-22.0,197.87,,682187.22501,...,,,,,,,,,47.0,
1555999,333102,RU41-11783,280.00,26/10/2018,136.897367,-30.447016,-22.0,197.87,,682187.22501,...,,,,,,,,,,
1556000,333102,RU41-11783,280.00,26/10/2018,136.897367,-30.447016,-22.0,197.87,,682187.22501,...,,,,,,,,,,
1556001,333102,RU41-11783,280.00,26/10/2018,136.897367,-30.447016,-22.0,197.87,,682187.22501,...,,,3.09,817.757802,,124.0,199.4,20.09,,


## Limiting of Drillhole Information
* As the drone system is able to capture approx 10-15cm of top soil, anything greater than this was eleminated from the datasets
* Correct inteval issues from an absense of ''from'' values and ''to'' values. 

In [12]:
df['sampletomax'] = df.groupby(['collar_nam'])['to'].transform(max)
df.loc[df['max_depth'] >= df['sampletomax'], 'true_max'] = df['max_depth']
df.loc[df['max_depth'] < df['sampletomax'], 'true_max'] = df['sampletomax']

df['dip'] = df['dip'].fillna(-90.0)
df['azi'] = df['azi'].fillna(-0)
df['dip'] = -df['dip'].abs()

df.loc[df['dip']<=(-40), 'true_dip'] = df['dip']
df.loc[df['dip']>(-40), 'true_dip'] = -(df['dip']+90) 

df=df[~df['from'].isnull()]
df['to'] = df['to'].fillna(df['from']+1)
df = df[df['to']!=0]

df['Interval'] = df['to'] - df['from']
df['sample_source'] = df['sample_source'].fillna('unkn')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['to'] = df['to'].fillna(df['from']+1)


# If drillhole data is being considerered then perform Desurveying of Drillholes
* Desurveying computes the geometry of a drillhole in three-dimensional space based on its collar location and the raw dip (or inclination), azimuth (or direction) and depth data of one or more surveys. The resulting geometry is a polyline – a connected series of (X, Y, Z) coordinates used to find the composite locations.
* The pysglib library was used to perform this desurveying

In [13]:
df

Unnamed: 0,collar_id,collar_nam,max_depth,date_drill,lon,lat,dip,azi,reference,xcoord,...,V,W,Y,Yb,Zn,Zr,sampletomax,true_max,true_dip,Interval
0,436,NULLARBOR PLAINS 6,304.80,10/09/1899,131.193825,-31.149046,-90.0,0.00,ENV 06488,137080.83819,...,,,,,2400.0,,299.92,304.80,-90.0,4.26
1,577,NULLARBOR PLAINS 7,162.15,31/12/1900,131.597958,-31.047286,-90.0,0.00,ENV 06488,175290.92394,...,,,,,91.0,,154.23,162.15,-90.0,36.88
2,577,NULLARBOR PLAINS 7,162.15,31/12/1900,131.597958,-31.047286,-90.0,0.00,ENV 06488,175290.92394,...,,,,,,,154.23,162.15,-90.0,36.88
3,577,NULLARBOR PLAINS 7,162.15,31/12/1900,131.597958,-31.047286,-90.0,0.00,ENV 06488,175290.92394,...,,,,,,,154.23,162.15,-90.0,36.88
4,577,NULLARBOR PLAINS 7,162.15,31/12/1900,131.597958,-31.047286,-90.0,0.00,ENV 06488,175290.92394,...,62.0,1.0,10.4,1.07,,139.0,154.23,162.15,-90.0,36.88
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1555998,333102,RU41-11783,280.00,26/10/2018,136.897367,-30.447016,-22.0,197.87,,682187.22501,...,,,,,47.0,,280.00,280.00,-68.0,2.00
1555999,333102,RU41-11783,280.00,26/10/2018,136.897367,-30.447016,-22.0,197.87,,682187.22501,...,,,,,,,280.00,280.00,-68.0,2.00
1556000,333102,RU41-11783,280.00,26/10/2018,136.897367,-30.447016,-22.0,197.87,,682187.22501,...,,,,,,,280.00,280.00,-68.0,2.00
1556001,333102,RU41-11783,280.00,26/10/2018,136.897367,-30.447016,-22.0,197.87,,682187.22501,...,,124.0,199.4,20.09,,,280.00,280.00,-68.0,2.00


In [14]:
df[(df['collar_nam']=='RL 1') & (df['Interval']>10)]

df.loc[list(range(47401, 47409)), 'from'] = 642
df.loc[list(range(8512, 8519)), 'to'] = 870.9
df.loc[list(range(436474, 436477)), 'to'] = 662
df.loc[[80030, 80031], 'from'] = 181
df.loc[[80442, 80443], 'to'] = 367
df.loc[[80440, 80441], 'to'] = 369
df.loc[[663622, 663623, 663624], 'from'] = 368
df.loc[[881022, 881023, 881024], 'to'] = 769.2
df.loc[[12050], 'to'] = 1010
df.loc[[64525, 64526], 'to'] = 230
df.loc[[1156448], 'from'] = 110
df = df.drop([7761, 7762])
df['Interval'] = df['to'] - df['from']

df = df[(df['Interval']>=1)&(df['Interval']<=10)]
df = df[df['to']>=100]

In [17]:
df[df['max_depth']>=600]

Unnamed: 0,collar_id,collar_nam,max_depth,date_drill,lon,lat,dip,azi,reference,xcoord,...,V,W,Y,Yb,Zn,Zr,sampletomax,true_max,true_dip,Interval
5198,6426,WILGENA 1,973.0,14/07/1983,134.640778,-30.696873,-90.0,0.00,RB 84/00013,465598.38454,...,,,,,,,973.0,973.0,-90.0,1.0
5199,6426,WILGENA 1,973.0,14/07/1983,134.640778,-30.696873,-90.0,0.00,RB 84/00013,465598.38454,...,,,,,,,973.0,973.0,-90.0,1.0
5200,6426,WILGENA 1,973.0,14/07/1983,134.640778,-30.696873,-90.0,0.00,RB 84/00013,465598.38454,...,,,,,,,973.0,973.0,-90.0,1.0
5201,6426,WILGENA 1,973.0,14/07/1983,134.640778,-30.696873,-90.0,0.00,RB 84/00013,465598.38454,...,,,,,,,973.0,973.0,-90.0,1.0
5202,6426,WILGENA 1,973.0,14/07/1983,134.640778,-30.696873,-90.0,0.00,RB 84/00013,465598.38454,...,,,,,,,973.0,973.0,-90.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1554534,333099,RD80,981.0,15/06/1980,136.908390,-30.447390,-90.0,347.47,,683245.23349,...,40.0,,,,,290.0,981.0,981.0,-90.0,8.0
1554535,333099,RD80,981.0,15/06/1980,136.908390,-30.447390,-90.0,347.47,,683245.23349,...,,1.5,63.0,,,,981.0,981.0,-90.0,8.0
1554536,333099,RD80,981.0,15/06/1980,136.908390,-30.447390,-90.0,347.47,,683245.23349,...,,,,,,,981.0,981.0,-90.0,8.0
1554537,333099,RD80,981.0,15/06/1980,136.908390,-30.447390,-90.0,347.47,,683245.23349,...,,,,,,,981.0,981.0,-90.0,8.0


In [8]:
collar = df[['collar_nam', 'xcoord', 'ycoord', 'DEM']]
collar = collar.rename(columns={'collar_nam':'BHID', 'xcoord':'XCOLLAR', 'ycoord':'YCOLLAR', 'DEM':'ZCOLLAR'})
collar = collar.drop_duplicates('BHID')
collar

Unnamed: 0,BHID,XCOLLAR,YCOLLAR,ZCOLLAR
0,NULLARBOR PLAINS 6,137080.83819,6.547639e+06,69.562889
22,TW 1,208744.37599,6.556477e+06,67.097267
49,TW 4,208208.60504,6.558013e+06,71.534447
89,TW 5,207128.93953,6.557195e+06,65.308632
109,TW 6,206285.49456,6.557976e+06,66.677780
...,...,...,...,...
1553225,RD38,679774.52810,6.630960e+06,107.408196
1553903,RD80,683245.23349,6.630092e+06,108.263771
1554755,RU41-11782,682187.33232,6.630151e+06,106.889030
1555055,RU41-11782W1,682187.33232,6.630151e+06,106.889030


In [9]:
survey = df[['collar_nam', 'true_max', 'true_dip', 'azi']]
survey.columns = ['BHID', 'AT', 'DIP', 'AZ']
survey = survey.drop_duplicates()
survey

Unnamed: 0,BHID,AT,DIP,AZ
0,NULLARBOR PLAINS 6,304.8,-90.0,0.00
22,TW 1,200.0,-90.0,0.00
49,TW 4,192.0,-90.0,0.00
89,TW 5,117.0,-90.0,0.00
109,TW 6,120.1,-90.0,0.00
...,...,...,...,...
1553225,RD38,1001.1,-90.0,347.47
1553903,RD80,981.0,-90.0,347.47
1554755,RU41-11782,219.7,-76.9,198.27
1555055,RU41-11782W1,236.2,-76.9,198.27


In [10]:
survey2 = pd.DataFrame(np.repeat(survey.values, 2, axis=0))
survey2.columns = survey.columns

survey2 = survey2.iloc[::2, :]
survey2['AT'] = 0
survey = pd.concat([survey, survey2])
survey['AT'] = survey['AT'].fillna(0)
survey = survey.drop_duplicates()

survey[survey.duplicated(['BHID', 'AT'], keep=False)].sort_values('BHID')

Unnamed: 0,BHID,AT,DIP,AZ
16752,TD 1,498.0,-90.0,0.0
154235,TD 1,498.0,-60.0,100.0
196,TD 1,0.0,-90.0,0.0
1738,TD 1,0.0,-60.0,100.0


In [11]:
survey = survey.drop([16752, 196])
survey = survey.sort_values(['BHID', 'AT'])
survey[survey['AT']==0]

assay = df.drop(['collar_id', 'max_depth', 'lon', 'lat', 'dip', 'sampletomax'], axis=1)
assay = assay.rename(columns={'collar_nam':'BHID', 'from':'FROM', 'to':'TO'})
assay

Unnamed: 0,BHID,date_drill,azi,reference,xcoord,ycoord,DEM,sample_id,sample_source,FROM,...,U,V,W,Y,Yb,Zn,Zr,true_max,true_dip,Interval
0,NULLARBOR PLAINS 6,10/09/1899,0.00,ENV 06488,137080.83819,6.547639e+06,69.562889,63504,Drill cuttings,295.66,...,,,,,,2400.0,,304.8,-90.0,4.26
22,TW 1,31/08/1980,0.00,ENV 03530,208744.37599,6.556477e+06,67.097267,63535,Drill cuttings,101.00,...,,,,,,75.0,,200.0,-90.0,2.00
23,TW 1,31/08/1980,0.00,ENV 03530,208744.37599,6.556477e+06,67.097267,63536,Drill cuttings,105.00,...,,,,,,75.0,,200.0,-90.0,2.00
24,TW 1,31/08/1980,0.00,ENV 03530,208744.37599,6.556477e+06,67.097267,63537,Drill cuttings,109.00,...,,,,,,80.0,,200.0,-90.0,2.00
25,TW 1,31/08/1980,0.00,ENV 03530,208744.37599,6.556477e+06,67.097267,63538,Drill cuttings,113.00,...,,,,,,85.0,,200.0,-90.0,2.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1555998,RU41-11783,26/10/2018,197.87,,682187.22501,6.630151e+06,106.889030,2978525,Drill core,278.00,...,,,,,,47.0,,280.0,-68.0,2.00
1555999,RU41-11783,26/10/2018,197.87,,682187.22501,6.630151e+06,106.889030,2978525,Drill core,278.00,...,,,,,,,,280.0,-68.0,2.00
1556000,RU41-11783,26/10/2018,197.87,,682187.22501,6.630151e+06,106.889030,2978525,Drill core,278.00,...,,,,,,,,280.0,-68.0,2.00
1556001,RU41-11783,26/10/2018,197.87,,682187.22501,6.630151e+06,106.889030,2978525,Drill core,278.00,...,817.757802,,124.0,199.4,20.09,,,280.0,-68.0,2.00


In [12]:
mydholedb=pygslib.drillhole.Drillhole(collar=collar, survey=survey)
mydholedb.addtable(assay, 'assay', overwrite = False)

mydholedb.validate()
mydholedb.desurvey('assay', warns=False, endpoints=True)
ddf = pd.DataFrame(mydholedb.table["assay"])
ddf

  mydholedb=pygslib.drillhole.Drillhole(collar=collar, survey=survey)
  mydholedb.desurvey('assay', warns=False, endpoints=True)
  mydholedb.desurvey('assay', warns=False, endpoints=True)
  mydholedb.desurvey('assay', warns=False, endpoints=True)
  mydholedb.desurvey('assay', warns=False, endpoints=True)
  mydholedb.desurvey('assay', warns=False, endpoints=True)
  mydholedb.desurvey('assay', warns=False, endpoints=True)
  mydholedb.desurvey('assay', warns=False, endpoints=True)
  mydholedb.desurvey('assay', warns=False, endpoints=True)
  mydholedb.desurvey('assay', warns=False, endpoints=True)
  mydholedb.desurvey('assay', warns=False, endpoints=True)
  mydholedb.desurvey('assay', warns=False, endpoints=True)
  mydholedb.desurvey('assay', warns=False, endpoints=True)
  mydholedb.desurvey('assay', warns=False, endpoints=True)
  mydholedb.desurvey('assay', warns=False, endpoints=True)


Unnamed: 0,BHID,date_drill,azi,reference,xcoord,ycoord,DEM,sample_id,sample_source,FROM,...,azmb,dipb,xb,yb,zb,azme,dipe,xe,ye,ze
0,00CHWB003,14/12/2000,0.0,ENV 09006,360998.81012,6.694173e+06,182.485229,1359262,"Drilled interval rock sample, type unspecified",96.0,...,180.0,-90.0,360998.81012,6.694173e+06,278.485229,180.0,-90.0,360998.81012,6.694173e+06,282.485229
1,00CHWB003,14/12/2000,0.0,ENV 09006,360998.81012,6.694173e+06,182.485229,1359263,"Drilled interval rock sample, type unspecified",100.0,...,180.0,-90.0,360998.81012,6.694173e+06,282.485229,180.0,-90.0,360998.81012,6.694173e+06,286.485229
2,00CHWB003,14/12/2000,0.0,ENV 09006,360998.81012,6.694173e+06,182.485229,1359264,"Drilled interval rock sample, type unspecified",104.0,...,180.0,-90.0,360998.81012,6.694173e+06,286.485229,180.0,-90.0,360998.81012,6.694173e+06,290.485229
3,00CHWB003,14/12/2000,0.0,ENV 09006,360998.81012,6.694173e+06,182.485229,1359265,"Drilled interval rock sample, type unspecified",108.0,...,180.0,-90.0,360998.81012,6.694173e+06,290.485229,180.0,-90.0,360998.81012,6.694173e+06,294.485229
4,00CHWB003,14/12/2000,0.0,ENV 09006,360998.81012,6.694173e+06,182.485229,1359266,"Drilled interval rock sample, type unspecified",112.0,...,180.0,-90.0,360998.81012,6.694173e+06,294.485229,180.0,-90.0,360998.81012,6.694173e+06,298.485229
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
829872,YSAC026,05/12/2008,0.0,ENV 10621,458587.70897,6.430818e+06,93.073540,1810968,Drill cuttings,96.0,...,180.0,-90.0,458587.70897,6.430818e+06,189.073540,180.0,-90.0,458587.70897,6.430818e+06,193.073540
829873,YSAC026,05/12/2008,0.0,ENV 10621,458587.70897,6.430818e+06,93.073540,1810969,Drill cuttings,100.0,...,180.0,-90.0,458587.70897,6.430818e+06,193.073540,180.0,-90.0,458587.70897,6.430818e+06,197.073540
829874,YSAC026,05/12/2008,0.0,ENV 10621,458587.70897,6.430818e+06,93.073540,1810969,Drill cuttings,100.0,...,180.0,-90.0,458587.70897,6.430818e+06,193.073540,180.0,-90.0,458587.70897,6.430818e+06,197.073540
829875,YSAC026,05/12/2008,0.0,ENV 10621,458587.70897,6.430818e+06,93.073540,1810970,Drill cuttings,104.0,...,180.0,-90.0,458587.70897,6.430818e+06,197.073540,0.0,-90.0,458587.70897,6.430818e+06,201.073540


In [13]:
ddf[['BHID', 'date_drill', 'reference', 'method', 'sample_id', 'FROM', 'TO', 'Au']].sort_values('Au', ascending=False).head(500)
ddf.loc[(ddf['reference']=='ENV 06960')&(ddf['method']=='FA2'), 'Au'] = ddf['Au']/1000
ddf.loc[(ddf['reference']=='ENV 09008')&(ddf['method']=='B/AAS'), 'Au'] = ddf['Au']/1000

In [14]:
ddf[['BHID', 'date_drill', 'reference', 'sample_id', 'Cu']].sort_values('Cu', ascending=False).head(500)

Unnamed: 0,BHID,date_drill,reference,sample_id,Cu
782266,WRD29,30/06/2008,ENV 09288,2077720,176000.0
596513,PH06D221,08/02/2006,,1986731,140000.0
596511,PH06D221,08/02/2006,,1986730,140000.0
596481,PH06D221,08/02/2006,,1986715,137000.0
596479,PH06D221,08/02/2006,,1986714,137000.0
...,...,...,...,...,...
723442,RP034,26/07/2003,ENV 06960,1978391,56000.0
146189,CAR051W1,22/04/2007,ENV 09185,1874689,56000.0
604674,PH06D257,15/03/2006,,1992417,55900.0
590770,PH06D210,17/01/2006,,1982893,55900.0


In [15]:
ddf[['BHID', 'date_drill', 'reference', 'sample_id', 'Pb']].sort_values('Pb', ascending=False).head(500)

Unnamed: 0,BHID,date_drill,reference,sample_id,Pb
459452,GP002D,05/03/1996,,1593295,76100.0
459455,GP002D,05/03/1996,,1593296,63300.0
26223,2000CADI7,21/08/2000,ENV 09248,2011808,61632.0
802313,WRD34,04/10/2010,ENV 09288,2663701,46100.0
309022,DD10EPS006,22/08/2010,ENV 06960,2622229,44944.0
...,...,...,...,...,...
624555,RC06PD027,25/01/2006,,1283388,2480.0
824976,WRD51,27/02/2012,ENV 09288,2124130,2470.0
458346,GHDD6,26/03/2008,ENV 11340,1931451,2462.0
360503,DD11HLH008,21/03/2011,ENV 06960,2626419,2460.0


In [55]:
ddf[['BHID', 'date_drill', 'reference', 'sample_id', 'Zn']].sort_values('Zn', ascending=False).head(500)

Unnamed: 0,BHID,date_drill,reference,sample_id,Zn
606436,PH06D261,19/03/2006,,1993463,106000.0
606438,PH06D261,19/03/2006,,1993462,106000.0
606426,PH06D261,19/03/2006,,1993456,103000.0
606424,PH06D261,19/03/2006,,1993457,103000.0
606458,PH06D261,19/03/2006,,1993475,101000.0
...,...,...,...,...,...
458183,GHDD6,26/03/2008,ENV 11340,1931405,4765.0
417186,DD12THM001,09/09/2012,ENV 06960,2656849,4756.0
458228,GHDD6,26/03/2008,ENV 11340,1931418,4727.0
456353,GHDD3,12/04/2007,ENV 11340,1932631,4723.0


In [57]:
ddf.loc[ddf['BHID']=='HNWPR95A', 'Zn'] = ddf['Zn']/1000
ddf.loc[ddf['BHID']=='PH06D261', 'Zn'] = ddf['Zn']/1000

ddf.to_csv(f'{data_dir}/desurveyed.csv')