# 地质数据
## data/ring-geo-info.txt
- 数据第一个是 环段编号
- 数据第二个是 ()+()+ 的形式。括号内是地质类型
- 数据第三个是 跟括号数目对应的厚度参数。有几个括号，就有几个厚度参数。共占五列
- 数据最后是 30.0 是总厚度

## data/geo-type.txt
- 这个是每种地质类型对应的地质属性值。是个table


In [1]:
import os
import numpy as np
import pandas as pd
import re
ring_geo_info = '../data/ring-geo-info.txt'
geo_type = '../data/geo-type.txt'

In [2]:
def approx_equal(a, b, tol=1e-5):
    return abs(a-b) <= max(abs(a), abs(b)) * tol

class GeoRecord:
    re_type = re.compile('(\d\d?-\d\d?(?:-\d\d?)?)')
    def __init__(self,txtarray):
        self.no = txtarray[0]
        self.types = self.re_type.findall(txtarray[1])
        self.thick = map(float,filter(bool,txtarray[2:7]))
        try:
            assert(len(self.types)==len(self.thick))
            assert(approx_equal(sum(self.thick),30.0))
        except:
            print str(self),sum(self.thick)
    def __str__(self):
        return str(self.no)+'\ttype:'+str(self.types)+'\tthick:'+str(self.thick)
    
    
records=[]
with open(ring_geo_info) as f:
    lines=f.readlines()
    for line in lines:
        records.append(GeoRecord(line.split('\t')))

df = pd.read_table(geo_type)
print df.shape
df

(7, 21)


Unnamed: 0,地质类别,2-3,2-4,4-2,4-4,4-5,4-8,4-9,4-10,4-11,...,7-2-2,9-1,9-2-1,9-2-2,9-3,12-1,12-2-1,12-2-2,12-3,12-4
0,Y3,17.0,18.5,19.0,18.0,19.0,19.5,20.0,20.5,21.0,...,18.5,19.5,20.5,22.5,24.5,19.5,20.5,22.5,24.5,26.5
1,FI3,4.5,18.0,15.0,8.0,20.0,26.0,28.0,32.0,35.0,...,22.5,25.0,27.5,45.0,55.0,27.0,30.0,45.0,55.0,70.0
2,EM3(压缩模量),4.0,5.5,15.0,4.5,6.5,20.0,22.0,25.0,35.0,...,20.0,40.0,90.0,10000.0,10000.0,40.0,90.0,10000.0,10000.0,10000.0
3,P3(泊松比),0.65,0.5,0.32,0.42,0.32,0.28,0.25,0.22,0.25,...,0.28,0.25,0.25,0.25,0.25,0.25,0.25,0.25,0.22,0.18
4,SITA3,0.65,0.5,0.2,0.7,0.48,0.45,0.4,0.35,0.35,...,0.55,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,K3,0.003,3.5,0.005,0.005,4.5,6.5,12.0,20.0,30.0,...,0.5,0.8,2.5,15.0,1.5,1.0,2.5,15.0,1.5,0.5
6,FRB3,10.0,20.0,25.0,18.0,22.0,35.0,50.0,55.0,65.0,...,28.0,45.0,60.0,125.0,380.0,45.0,60.0,125.0,380.0,650.0


- 序列化地质数据，用pickle保存。

In [3]:
import pickle
fname = '../data/geo.pkl'
with open(fname,'wb') as f:
    pickle.dump([df,records],f)

In [4]:
typeinfo=None
records=None
with open(fname,'rb') as f:
    typeinfo,records = pickle.load(f)
print len(records)
for r in records:
    print r
typeinfo    

88
085	type:['4-4', '4-5', '7-2-1']	thick:[4.82, 7.5, 17.68]
119	type:['4-4', '4-10', '7-2-2', '12-1']	thick:[7.15, 7.0, 10.98, 4.87]
130	type:['4-4', '4-10', '7-2-2', '12-1', '12-2-1']	thick:[7.74, 5.5, 3.52, 7.02, 6.22]
141	type:['4-4', '4-10', '7-2-2', '12-1']	thick:[8.13, 4.5, 15.46, 1.91]
152	type:['4-2', '7-2-2', '12-1']	thick:[2.07, 21.51, 6.42]
160	type:['4-10', '7-2-2', '12-1']	thick:[1.0, 21.96, 7.04]
173	type:['2-3', '2-4', '7-2-2', '12-1']	thick:[1.4, 6.98, 8.12, 13.5]
185	type:['2-3', '7-2-1', '7-2-2']	thick:[7.58, 7.08, 15.34]
206	type:['2-3', '4-2', '7-2-1']	thick:[3.18, 3.52, 23.3]
215	type:['4-2', '7-2-1', '7-2-2']	thick:[0.27, 26.0, 3.73]
227	type:['4-2', '4-10', '7-2-2']	thick:[0.49, 17.45, 12.06]
238	type:['4-2', '7-2-2']	thick:[2.39, 27.61]
250	type:['4-2', '7-2-2', '12-1', '12-2-1']	thick:[4.04, 8.5, 6.51, 10.95]
260	type:['7-2-2', '12-1', '12-2-1']	thick:[8.08, 9.5, 12.42]
272	type:['7-2-2', '12-1', '12-2-1']	thick:[13.09, 9.0, 7.91]
282	type:['7-2-2', '12-1', '1

Unnamed: 0,地质类别,2-3,2-4,4-2,4-4,4-5,4-8,4-9,4-10,4-11,...,7-2-2,9-1,9-2-1,9-2-2,9-3,12-1,12-2-1,12-2-2,12-3,12-4
0,Y3,17.0,18.5,19.0,18.0,19.0,19.5,20.0,20.5,21.0,...,18.5,19.5,20.5,22.5,24.5,19.5,20.5,22.5,24.5,26.5
1,FI3,4.5,18.0,15.0,8.0,20.0,26.0,28.0,32.0,35.0,...,22.5,25.0,27.5,45.0,55.0,27.0,30.0,45.0,55.0,70.0
2,EM3(压缩模量),4.0,5.5,15.0,4.5,6.5,20.0,22.0,25.0,35.0,...,20.0,40.0,90.0,10000.0,10000.0,40.0,90.0,10000.0,10000.0,10000.0
3,P3(泊松比),0.65,0.5,0.32,0.42,0.32,0.28,0.25,0.22,0.25,...,0.28,0.25,0.25,0.25,0.25,0.25,0.25,0.25,0.22,0.18
4,SITA3,0.65,0.5,0.2,0.7,0.48,0.45,0.4,0.35,0.35,...,0.55,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,K3,0.003,3.5,0.005,0.005,4.5,6.5,12.0,20.0,30.0,...,0.5,0.8,2.5,15.0,1.5,1.0,2.5,15.0,1.5,0.5
6,FRB3,10.0,20.0,25.0,18.0,22.0,35.0,50.0,55.0,65.0,...,28.0,45.0,60.0,125.0,380.0,45.0,60.0,125.0,380.0,650.0


In [5]:
typesInRec = set()
for r in records:
    typesInRec |= set(r.types)
    
typesAll = set(df.columns[1:])
print typesInRec
print typesAll
print typesInRec-typesAll

set(['9-1', '2-3', '4-2', '4-4', '4-5', '2-4', '4-9', '7-2-1', '7-2-2', '4-10', '9-2-1', '12-2-1', '9-3', '12-3', '12-1'])
set(['9-1', '2-3', '4-2', '4-4', '4-5', '2-4', '4-8', '4-9', '9-2-1', '7-2-1', '7-2-2', '9-2-2', '4-10', '4-11', '12-4', '12-2-1', '9-3', '12-2-2', '12-3', '12-1'])
set([])
