In [25]:
import pandas as pd
from src.funcs_importar import import_xyz, gdb
                                                            # ARQUIVO COM DEFINICOES DE FUNCOES ESCRITAS EM
                                                            # PYTHON PARA FACILITAR OS PROCESSOS DE PLOTS E LEITURA DOS DADOS...
                                                            # E NO FUTURO GERAR UM SCRIPT DE ENCADEAMENTO DESTAS FUNÇOES.

In [26]:
gdb()

'/home/ggrl/geodatabase/'

# Levantamento 3022

## Dados Gamaespectrométricos

### Recuperando os dados selecionados do XYZ para serem analizados

In [59]:
gama_3022_cols = 'ALTURA BARO COSMICO CTB CTCOR CTEXP DATA eTh eU FIDUCIAL GPSALT HORA KB KCOR KPERC LATITUDE LIVE_TIME LONGITUDE MDT TEMP THB THCOR THKRAZAO UB UCOR UKRAZAO UTHRAZAO UUP X X_WGS Y Y_WGS'.split(" ")

gama_3022 = pd.read_csv(gdb('geof/3022_Gama.XYZ'),
                         names=gama_3022_cols,
                         delim_whitespace=True,
                         skiprows=8,
                         usecols=['X',"Y",
                                  "MDT",
                                  "KPERC", 'eU', 'eTh', 'CTCOR',
                                  'UTHRAZAO',"THKRAZAO","UKRAZAO"],
                         na_values=('*'),                     
                         dtype=('float'),
                         encoding='latin-1'
                         )

In [60]:
gama_3022.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 676766 entries, 0 to 676765
Data columns (total 10 columns):
 #   Column    Non-Null Count   Dtype  
---  ------    --------------   -----  
 0   CTCOR     676766 non-null  float64
 1   eTh       676766 non-null  float64
 2   eU        676766 non-null  float64
 3   KPERC     676766 non-null  float64
 4   MDT       676766 non-null  float64
 5   THKRAZAO  676766 non-null  float64
 6   UKRAZAO   676766 non-null  float64
 7   UTHRAZAO  676766 non-null  float64
 8   X         676766 non-null  float64
 9   Y         676766 non-null  float64
dtypes: float64(10)
memory usage: 51.6 MB


### Definindo funçoes que listam os dados brutos e descreve suas distribuiçoes estatisticas

In [61]:
def list_columns(geof):
    print('Listando atributos dos dados geofisicos')
    atributos_geof = list(geof.columns)             # DataFrame.columns
    lista_atributo_geof=[]
    lista_atributo_geog=[]
    lista_atributo_proj=[]

    for atributo in atributos_geof:
        if atributo == 'LATITUDE':
            lista_atributo_geog.append(atributo)
        elif atributo == 'LONGITUDE':
            lista_atributo_geog.append(atributo)
        elif atributo == 'LONG':
            lista_atributo_geog.append(atributo)
        elif atributo == 'LAT':
            lista_atributo_geog.append(atributo) 
        elif atributo == 'X':
            lista_atributo_proj.append(atributo)
        elif atributo == 'Y':
            lista_atributo_proj.append(atributo)
        elif atributo == 'UTME':
            lista_atributo_proj.append(atributo)
        elif atributo == 'UTMN':
            lista_atributo_proj.append(atributo)
        elif atributo == 'X_WGS':
            lista_atributo_proj.append(atributo)
        elif atributo == 'Y_WGS':
            lista_atributo_proj.append(atributo)
        else:
            lista_atributo_geof.append(atributo)
    codigo=str(geof)        
    print(f"# --- # Listagem de dados do aerolevantamento:  ")
    print(f"Lista de atributos geofísicos = {lista_atributo_geof}")
    print(f"lista de atributos geograficos = {lista_atributo_geog}")
    print(f"lista de atributos projetados = {lista_atributo_proj}")
    return lista_atributo_geof, lista_atributo_geog, lista_atributo_proj

# DESCRIÇÃO ESTATISTICA DOS DADOS AEROGEOFÍSICOS
def descricao(geof):
    lista_atributo_geof,lista_atributo_geog,lista_atributo_proj = list_columns(geof)  # USANDO FUNCAO DEFINIDA ACIMA PARA CATEGORIZAR METADADO
    
    
    metadatadict = pd.DataFrame(geof.dtypes)
    metadatadict["Valores Faltantes"] = geof.isnull().sum()
    metadatadict["Valores Únicos"] = geof.nunique()
    metadatadict["Valoes Negativos"] = sum(n < 0 for n in geof.values)
    metadatadict["Amostragem"] = geof.count()
    metadatadict = metadatadict.rename(columns = {0 : 'dType'})

    geof_df = geof.drop(axis=0,columns=lista_atributo_geog)
    geof_df.drop(axis=0,columns=lista_atributo_proj,inplace=True)

    #datadict['Valores Negativos'] = lista_negativo

    geof_descrito = geof_df.describe(percentiles=[0.001,0.1,0.25,0.5,0.75,0.995])
    
    return metadatadict,lista_atributo_geof,lista_atributo_geog,lista_atributo_proj,geof_descrito


In [62]:
gama_3022_descricao = descricao(gama_3022)

Listando atributos dos dados geofisicos
# --- # Listagem de dados do aerolevantamento:  
Lista de atributos geofísicos = ['CTCOR', 'eTh', 'eU', 'KPERC', 'MDT', 'THKRAZAO', 'UKRAZAO', 'UTHRAZAO']
lista de atributos geograficos = []
lista de atributos projetados = ['X', 'Y']


#### Observamos valores negativos nas contagens radiométricas tratadas, isso nos leva a necessidade de manipular estes valores algebrigamente para que os resultados, tanto da interpolação como das classificações supervisiondas, sejam aprimorados.

In [63]:
gama_3022_descricao[0]

Unnamed: 0,dType,Valores Faltantes,Valores Únicos,Valoes Negativos,Amostragem
CTCOR,float64,0,237255,5581,676766
eTh,float64,0,1991,1693,676766
eU,float64,0,575,196639,676766
KPERC,float64,0,6191,48235,676766
MDT,float64,0,72962,0,676766
THKRAZAO,float64,0,234249,0,676766
UKRAZAO,float64,0,34275,0,676766
UTHRAZAO,float64,0,763,0,676766
X,float64,0,612143,0,676766
Y,float64,0,645094,0,676766


In [66]:
df = gama_3022

df['K_pos'] = df['KPERC'] - df['KPERC'].min() + 0.01
df['eU_pos'] = df['eU'] - df['eU'].min() + 0.01
df['eTh_pos'] = df['eTh'] - df['eTh'].min() + 0.01

# Fator F
df['fF'] = df.K_pos * df.eU_pos / df.eTh_pos

# Cálculo de Razões de Bandas ( SEM LOW-PASS )
#df['UTH_R']
#df['KTH_R']

In [67]:
df_descricao = descricao(df)

Listando atributos dos dados geofisicos
# --- # Listagem de dados do aerolevantamento:  
Lista de atributos geofísicos = ['CTCOR', 'eTh', 'eU', 'KPERC', 'MDT', 'THKRAZAO', 'UKRAZAO', 'UTHRAZAO', 'K_pos', 'eU_pos', 'eTh_pos', 'fF']
lista de atributos geograficos = []
lista de atributos projetados = ['X', 'Y']


In [69]:
df_descricao[4].T

Unnamed: 0,count,mean,std,min,0.1%,10%,25%,50%,75%,99.5%,max
CTCOR,676766.0,1266.22835,1345.323409,-11834.13,-140.74115,482.72,743.43,1079.81,1482.31,7197.69875,450895.02
eTh,676766.0,16.221399,18.263838,-3.8,-0.3,6.0,9.1,13.1,18.7,97.3,6371.4
eU,676766.0,0.436867,2.34944,-129.3,-2.3,-0.4,-0.1,0.2,0.7,7.6,934.8
KPERC,676766.0,0.644188,0.815783,-149.323,-0.21,0.025,0.131,0.367,0.88,4.213,24.286
MDT,676766.0,977.859156,140.620854,432.77,585.08765,818.58,876.03,960.12,1056.83,1416.46,1782.28
THKRAZAO,676766.0,115.978529,424.81199,0.155,0.894,7.695,14.066,32.872,102.50775,1326.76675,205529.094
UKRAZAO,676766.0,3.012281,53.502275,0.002,0.005,0.028,0.109,0.663,2.189,46.192225,30156.09
UTHRAZAO,676766.0,0.034024,0.047602,0.0,0.0,0.001,0.003,0.019,0.049,0.261,2.807
K_pos,676766.0,149.977188,0.815783,0.01,149.123,149.358,149.464,149.7,150.213,153.546,173.619
eU_pos,676766.0,129.746867,2.34944,0.01,127.01,128.91,129.21,129.51,130.01,136.91,1064.11


In [70]:
df_descricao[0]

Unnamed: 0,dType,Valores Faltantes,Valores Únicos,Valoes Negativos,Amostragem
CTCOR,float64,0,237255,5581,676766
eTh,float64,0,1991,1693,676766
eU,float64,0,575,196639,676766
KPERC,float64,0,6191,48235,676766
MDT,float64,0,72962,0,676766
THKRAZAO,float64,0,234249,0,676766
UKRAZAO,float64,0,34275,0,676766
UTHRAZAO,float64,0,763,0,676766
X,float64,0,612143,0,676766
Y,float64,0,645094,0,676766


## Magnetometria

In [29]:
mag_3022_cols = 'ALTURA BARO DATA FID GPSALT HORA IGRF LAT_WGS LONG_WGS MAGBASE MAGBRU MAGCOM MAGCOR MAGIGRF MAGMIC MAGNIV MDT X X_WGS Y Y_WGS'.split(" ")


mag_3022 = pd.read_csv(gdb('geof/3022_Mag.XYZ'),
                         names=mag_3022_cols,
                         delim_whitespace=True,
                         skiprows=8,
                         usecols=["X","Y","LAT_WGS","LONG_WGS",
                                  "ALTURA","MDT",
                                  "MAGIGRF","MAGCOR","MAGNIV"],
                         na_values=('*'),                     
                         dtype=('float'),
                         encoding='latin-1')

In [30]:
mag_3022.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6760982 entries, 0 to 6760981
Data columns (total 9 columns):
 #   Column    Dtype  
---  ------    -----  
 0   ALTURA    float64
 1   LAT_WGS   float64
 2   LONG_WGS  float64
 3   MAGCOR    float64
 4   MAGIGRF   float64
 5   MAGNIV    float64
 6   MDT       float64
 7   X         float64
 8   Y         float64
dtypes: float64(9)
memory usage: 464.2 MB


# Levantamento 1105

### Recuperando os dados selecionados do XYZ para serem analizados

In [23]:
gama_line_1105_cols = 'KB DATA BARO UB THB COSMICO CTB UUP ALTURA KPERC eU eTH CTEXP UTHRAZAO X Y UKRAZAO MDT THKRAZAO LIVE_TIME CTCOR KCOR THCOR UCOR HORA GPSALT LATITUDE FIDUCIAL TEMP LONGITUDE'.split(" ")             


gama_line_1105 = pd.read_csv(gdb('geof/1105_GamaLine.XYZ'),
                         names=gama_line_1105_cols,
                         delim_whitespace=True,
                         skiprows=10,                                     # Linhas de cabeçalho
                         usecols=["X","Y","LATITUDE","LONGITUDE",
                                  "KPERC","eU","eTH","CTCOR",
                                  "THKRAZAO","UTHRAZAO","UKRAZAO","MDT"])

In [12]:
gama_line_1105.info()
#gama_tie_1105.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1169659 entries, 0 to 1173983
Data columns (total 12 columns):
 #   Column     Non-Null Count    Dtype  
---  ------     --------------    -----  
 0   KPERC      1169659 non-null  float64
 1   eU         1169659 non-null  float64
 2   eTH        1169659 non-null  float64
 3   UTHRAZAO   1169659 non-null  float64
 4   X          1169659 non-null  float64
 5   Y          1169659 non-null  float64
 6   UKRAZAO    1169659 non-null  float64
 7   MDT        1169659 non-null  float64
 8   THKRAZAO   1169659 non-null  float64
 9   CTCOR      1169659 non-null  float64
 10  LATITUDE   1169659 non-null  float64
 11  LONGITUDE  1169659 non-null  float64
dtypes: float64(12)
memory usage: 116.0 MB


## Dados Gamaespectrometricos

### Descriçao estatistica dos dados brutos

In [32]:
describe_gama_line_1105 = descricao(gama_line_1105)

Listando atributos dos dados geofisicos
# --- # Listagem de dados do aerolevantamento:  
Lista de atributos geofísicos = ['KPERC', 'eU', 'eTH', 'UTHRAZAO', 'UKRAZAO', 'MDT', 'THKRAZAO', 'CTCOR']
lista de atributos geograficos = ['LATITUDE', 'LONGITUDE']
lista de atributos projetados = ['X', 'Y']


In [16]:
describe_gama_line_1105[0]

Unnamed: 0,dType,Valores Faltantes,Valores Únicos,Valoes Negativos,Amostragem
KPERC,float64,0,6955,47711,1169659
eU,float64,0,8168,55720,1169659
eTH,float64,0,61336,35862,1169659
UTHRAZAO,float64,0,793,0,1169659
X,float64,0,888292,0,1169659
Y,float64,0,1134332,0,1169659
UKRAZAO,float64,0,18813,0,1169659
MDT,float64,0,165213,72,1169659
THKRAZAO,float64,0,115015,0,1169659
CTCOR,float64,0,56613,12033,1169659


In [17]:
describe_gama_line_1105[4].T

Unnamed: 0,count,mean,std,min,0.1%,10%,25%,50%,75%,99.5%,max
KPERC,1169659.0,0.95979,0.854709,-0.5,-0.355,0.085,0.382,0.757,1.306,4.53,14.577
eU,1169659.0,1.736779,1.092923,-0.5,-0.381,0.325,1.008,1.673,2.373,5.383,23.934
eTH,1169659.0,19.289047,11.065808,-4.048,-0.722,6.86,12.29,18.382,25.207,59.156,241.474
UTHRAZAO,1169659.0,0.097126,0.042352,0.0,0.0,0.056,0.073,0.09,0.111,0.279,2.669
UKRAZAO,1169659.0,2.716171,2.416952,0.042,0.181,0.819,1.3,1.911,3.302,14.902,59.007
MDT,1169659.0,872.675007,408.562445,-4.1,0.0,395.28,641.52,887.68,1113.55,1933.6113,2751.59
THKRAZAO,1169659.0,29.505581,25.788561,0.798,3.123,10.004,14.793,20.741,34.791,162.71671,525.859
CTCOR,1169659.0,14.176621,10.73809,-11.575,-1.641,3.814,6.608,11.263,19.699,56.542,246.6


### Manipulaçao algebrica dos dados brutos

In [13]:
#gama_1105.to_csv(gdb+'geof/gama_1105',index=False)
#gama_tie_1105.to_csv(gdb+'geof/gama_tie_1105',index=False)
gama_line_1105.to_csv(gdb+'geof/gama_line_1105_corrigido',index=False)

## Dados Magnetometricos

### Descriçao estatistica dos dados brutos

#### Flight Lines

In [None]:
# PODEMOS OBSERVAR VALORES NEGATIVOS QUE DEVEM SER TRATADOS. OU TRANSFORMANDO-OS EM 0 OU NORMALIZANDO DE 0 `A 1
gama_line_1105_describe[4].T

#### Tie Lines

In [None]:
gama_tie_1105_describe = f.descricao(gama_tie_1105)

In [None]:
gama_tie_1105_describe[0]

In [None]:
# ha uma deferenca consideravel entre os dados de tie e line para este levantamento
gama_tie_1105_describe[4].T

#### Tie Flight Lines Concatenados

In [None]:
gama_1105_describe =f.descricao(gama_1105)

In [None]:
gama_1105_describe[0]

In [None]:
gama_1105_describe[4].T

In [None]:
print(gama_tie_1105.query('KPERC < 0')['KPERC'].count())
print(gama_line_1105.query('KPERC < 0')['KPERC'].count())
print(gama_1105.query('KPERC < 0')['KPERC'].count())

### Exportando os arquivos tratados para um CSV

In [None]:
gama_1105.to_csv(gdb+'geof/gama_1105',index=False)
gama_tie_1105.to_csv(gdb+'geof/gama_tie_1105',index=False)
gama_line_1105.to_csv(gdb+'geof/gama_line_1105',index=False)

In [None]:
mag_line_1105 = pd.read_csv(gdb+'xyz/1105_XYZ/1105_MagLine.XYZ',
                         names=mag_cols,
                         delim_whitespace=True,
                         skiprows=11,
                         usecols=["X","Y","LATITUDE","LONGITUDE",
                                  "MAGIGRF","MDT","ALTURA","ALTURA_1"])

mag_tie_1105 = pd.read_csv(gdb+'xyz/1105_XYZ/1105_MagTie.XYZ',
                         names=mag_cols,
                         delim_whitespace=True,
                         skiprows=11,
                         usecols=["X","Y","LATITUDE","LONGITUDE",
                                  "MAGIGRF","MDT","ALTURA","ALTURA_1"])


mag_tie_1105.dropna(inplace=True)
mag_line_1105.dropna(inplace=True)
mag_1105 = pd.concat([mag_line_1105,mag_tie_1105], ignore_index=True)

In [None]:
f.descricao(mag_1105)

In [None]:
mag_line_1105.to_csv(gdb+'geof/mag_line_1105', index=False)
mag_tie_1105.to_csv(gdb+'geof/mag_tie_1105', index=False)
mag_1105.to_csv(gdb+'geof/mag_1105', index=False)

In [None]:
cols_1039 = 'UTME UTMN LONG LAT MAGR THC UC KC CTC MAGB MAGC MAGD THB UB KB CTB FIDU TEMP ALTE ALTB'.split(" ")

usecols=["UTME","UTMN","LAT","LONG","KC","UC","THC","CTC","MAGR"]


geof_1039 = pd.read_csv(gdb+'xyz/1039_XYZ/spaulo_rjaneiro_sp.xyz',
                         names=cols_1039,
                         delim_whitespace=True,
                         skiprows=6,
                         usecols=usecols,
                         encoding='latin-1') # foi adicionado um novo tipo de econding diferente de UTF-8

In [None]:
geof_1039.dropna(inplace=True)

In [None]:
geof_1039.to_csv(gdb+'geof/geof_1039',index=False)

### Exportando os arquivos tratados para um CSV

# Levantamento 1089

In [None]:
mLine_1089 = pd.read_csv(gdb+'xyz/1089_XYZ/XYZ/1089_MagLine.XYZ',
                         names=mag_cols,
                         delim_whitespace=True,
                         skiprows=11,
                         usecols=["X","Y","LATITUDE","LONGITUDE",
                                  "MAGIGRF","ALTURA","MDT"],
                         na_values=('*'))

In [None]:
mTie_1089 = pd.read_csv(gdb+'xyz/1089_XYZ/XYZ/1089_MagTie.XYZ',
                         names=mag_cols,
                         delim_whitespace=True,
                         skiprows=11,
                         usecols=["X","Y","LATITUDE","LONGITUDE",
                                  "MAGIGRF","ALTURA","MDT"],
                         na_values=('*'))

In [None]:
m_1089 = pd.concat([mTie_1089,mLine_1089], ignore_index=True)

In [None]:
mag_1089 = m_1089.dropna()

In [None]:
mag_1089.to_csv(gdb+'geof/mag_1089',index=False)

In [None]:
a = f.descricao(mag_1089)

In [None]:
a[0]

In [None]:
a[4].T