# Neste notebook vamos utilizar as funçoes read_csv() e dropna() da biblioteca Pandas.

## Com um visualizador de logs podemos abrir os arquivos GamaLine.XYZ e GamaTie.XYZ e indentificamos:
    - os nomes de cada coluna do arquivo e;
    - o numero de linhas de cabeçalho.

# Levantamento 1105

## Dados Gamaespectrometricos

### Colunas descritas nos dados brutos

In [2]:
gama_line_cols = 'KB DATA BARO UB THB COSMICO CTB UUP ALTURA KPERC eU eTH CTEXP UTHRAZAO X Y UKRAZAO MDT THKRAZAO LIVE_TIME CTCOR KCOR THCOR UCOR HORA GPSALT LATITUDE FIDUCIAL TEMP LONGITUDE'.split(" ")

gama_tie_cols = 'KB DATA BARO UB THB COSMICO CTB UUP ALTURA KPERC eU eTH CTEXP UTHRAZAO X Y UKRAZAO MDT THKRAZAO LIVE_TIME CTCOR KCOR THCOR UCOR HORA GPSALT LATITUDE FIDUCIAL TEMP LONGITUDE __X __Y'.split(" ")

### Importando dados brutos

In [5]:
gama_line_1105 = pd.read_csv(gdb+'xyz/1105_XYZ/1105_GamaLine.XYZ',
                         names=gama_line_cols,
                         delim_whitespace=True,
                         skiprows=11,                                     # Linhas de cabeçalho
                         usecols=["X","Y","LATITUDE","LONGITUDE",
                                  "KPERC","eU","eTH","CTCOR",
                                  "THKRAZAO","UTHRAZAO","UKRAZAO","MDT"])

gama_tie_1105 = pd.read_csv(gdb+'xyz/1105_XYZ/1105_GamaTie.XYZ',
                         names=gama_tie_cols,
                         delim_whitespace=True,
                         skiprows=11,
                         usecols=["X","Y","LATITUDE","LONGITUDE",
                                  "KPERC","eU","eTH","CTCOR",
                                  "THKRAZAO","UTHRAZAO","UKRAZAO","MDT"])

gama_1105     = pd.concat([gama_line_1105,gama_tie_1105], ignore_index=True) # Unindo a line e tie em um unico arquivo

### Removendo valores nao numericos

In [6]:
gama_line_1105.dropna(inplace=True) 
gama_tie_1105.dropna(inplace=True)
gama_1105.dropna(inplace=True)

### Exportando os arquivos tratados para um CSV

In [7]:
gama_1105.to_csv(gdb+'geof/gama_1105',index=False)
gama_tie_1105.to_csv(gdb+'geof/gama_tie_1105',index=False)
gama_line_1105.to_csv(gdb+'geof/gama_line_1105',index=False)

## Dados Magnetometricos

### Colunas descritas nos dados brutos

In [2]:
mag_cols = 'DATA BARO ALTURA X Y MDT HORA GPSALT LATITUDE FIDUCIAL LONGITUDE ALTURA_1 IGRF MAGBASE MAGBRU MAGCOM MAGCOR MAGIGRF MAGMIC MAGNIV'.split(" ")

### Importando dados brutos

In [15]:
mag_line_1105 = pd.read_csv(gdb+'xyz/1105_XYZ/1105_MagLine.XYZ',
                         names=mag_cols,
                         delim_whitespace=True,
                         skiprows=11,                                     # Linhas de cabeçalho
                         usecols=["X","Y","LATITUDE","LONGITUDE",
                                  "MAGIGRF","ALTURA","ALTURA_1","MDT"])

mag_tie_1105 = pd.read_csv(gdb+'xyz/1105_XYZ/1105_MagTie.XYZ',
                         names=mag_cols,
                         delim_whitespace=True,
                         skiprows=11,
                         usecols=["X","Y","LATITUDE","LONGITUDE",
                                  "MAGIGRF","ALTURA","ALTURA_1","MDT"])

mag_1105 = pd.concat([mag_line_1105,mag_tie_1105], ignore_index=True)

### Removendo valores nao numericos

In [None]:
print(mag_1105.shape)        # numero de linhas e colunas;
print(mag_1105.isna().sum()) # soma dos valores nao numericos para cada coluna;

(12344761, 8)
ALTURA       4103
X            4103
Y            4103
MDT          4103
LATITUDE     4103
LONGITUDE    4103
ALTURA_1     4103
MAGIGRF      4103
dtype: int64


In [None]:
mag_line_1105.dropna(inplace=True) 
mag_tie_1105.dropna(inplace=True)
mag_1105.dropna(inplace=True)

### Exportando os arquivos tratados para um CSV

In [5]:
mag_line_1105.to_csv(gdb+'geof/mag_line_1105', index=False)
mag_tie_1105.to_csv(gdb+'geof/mag_tie_1105', index=False)
mag_1105.to_csv(gdb+'geof/mag_1105', index=False)

In [23]:
cols_1039 = 'UTME UTMN LONG LAT MAGR THC UC KC CTC MAGB MAGC MAGD THB UB KB CTB FIDU TEMP ALTE ALTB'.split(" ")

usecols=["UTME","UTMN","LAT","LONG","KC","UC","THC","CTC","MAGR"]


geof_1039 = pd.read_csv(gdb+'xyz/1039_XYZ/spaulo_rjaneiro_sp.xyz',
                         names=cols_1039,
                         delim_whitespace=True,
                         skiprows=6,
                         usecols=usecols,
                         encoding='latin-1') # foi adicionado um novo tipo de econding diferente de UTF-8

In [24]:
geof_1039.dropna(inplace=True)

In [25]:
geof_1039.to_csv(gdb+'geof/geof_1039',index=False)

# Levantamento 3022 

## Dados Gamaespectrometricos

### Colunas descritas nos dados brutos

In [40]:
from src.funcs_importar import gdb
from src.funcs_importar import geof_gdb

In [41]:
gama_cols = 'ALTURA BARO COSMICO CTB CTCOR CTEXP DATA eTh eU FIDUCIAL GPSALT HORA KB KCOR KPERC LATITUDE LIVE_TIME LONGITUDE MDT TEMP THB THCOR THKRAZAO UB UCOR UKRAZAO UTHRAZAO UUP X X_WGS Y Y_WGS'.split(" ")

mag_cols = 'ALTURA BARO DATA FID GPSALT HORA IGRF LAT_WGS LONG_WGS MAGBASE MAGBRU MAGCOM MAGCOR MAGIGRF MAGMIC MAGNIV MDT X X_WGS Y Y_WGS'.split(" ")


In [43]:
g_3022_raw = pd.read_csv(gdb('/home/ggrl/geodatabase/xyz/3022_XYZ/Area_14_gama.XYZ'),
                         names=mag_cols,
                         delim_whitespace=True,
                         skiprows=8,
                         usecols=['X',"Y","LATITUDE","LONGITUDE",'X_WGS','Y_WGS',
                                  "BARO", 'KB', 'UB', 'THB','COSMICO',''
                                  "KPERC", 'eU', 'eTH', 'CTEXP', 'UTHRAZAO',"MDT"],
                         na_values=('*'),                     
                         dtype=('float'),
                         encoding='latin-1')

ValueError: Number of passed names did not match number of header fields in the file

### Importando dados brutos
 - Este arquivo:
    - XYZ obtiddo atravéz do Professor Vinicius Abud Louro;
    - provém de um projeto diferente dos disponibilizados no portal SGB;
    - não possui os mesmo padrões que os arquivos disponibilizados no portal SGB;
    - possui valores expúrios entre os valores gama e mag;
        - valor identificado como '*' atravéz dos seguintes passos:
            - 
 

In [18]:
m_3022_raw = pd.read_csv(gdb('/home/ggrl/geodatabase/xyz/3022_XYZ/Area_14_mag.XYZ'),
                         names=mag_cols,
                         delim_whitespace=True,
                         skiprows=8,
                         usecols=["X","Y","LAT_WGS","LONG_WGS",
                                  "MAGIGRF","MDT"],
                         na_values=('*'),                     
                         dtype=('float'),
                         encoding='latin-1')

### Removendo valores nao numericos

In [None]:
gama_line_1105.dropna(inplace=True) 
gama_tie_1105.dropna(inplace=True)
gama_1105.dropna(inplace=True)

### Exportando os arquivos tratados para um CSV

In [None]:
gama_1105.to_csv(gdb+'geof/gama_1105',index=False)
gama_tie_1105.to_csv(gdb+'geof/gama_tie_1105',index=False)
gama_line_1105.to_csv(gdb+'geof/gama_line_1105',index=False)

# Levantamento 1089

### Colunas descritas nos dados brutos

In [8]:
gama_cols = 'X Y FIDUCIAL GPSALT BARO ALTURA MDT CTB KB UB THB UPU LIVE_TIME COS TEMP KPERC eU eth CTEXP CTCOR UCOR THCOR KCOR THKRAZAO UKRAZAO UTHRAZAO LONGITUDE LATITUDE DATA HORA'.split(" ")

mag_cols = 'X Y FIDUCIAL GPSALT BARO ALTURA MDT MAGBASE MAGBRU MAGCOM MAGCOR MAGNIV MAGMIC MAGIGRF IGRF LONGITUDE LATITUDE DATA HORA'.split(" ")

In [3]:
gama_line_1089 = pd.read_csv(gdb+'xyz/1089_XYZ/XYZ/1089_GamaLine.XYZ',
                         names=gama_cols,
                         delim_whitespace=True,
                         skiprows=11,
                         usecols=["X","Y","LATITUDE","LONGITUDE",
                                  "KPERC","eU","eth","CTCOR",
                                  "THKRAZAO","UKRAZAO","UTHRAZAO",
                                  "MDT"],
                         na_values=('*'))

gama_tie_1089 = pd.read_csv(gdb+'xyz/1089_XYZ/XYZ/1089_GamaTie.XYZ',
                         names=gama_cols,
                         delim_whitespace=True,
                         skiprows=11,
                         usecols=["X","Y","LATITUDE","LONGITUDE",
                                  "KPERC","eU","eth","CTCOR",
                                  "THKRAZAO","UKRAZAO","UTHRAZAO",
                                  "MDT"],
                         na_values=('*'))

gama_1089 = pd.concat([gama_tie_1089,gama_line_1089], ignore_index=True)

  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)


In [4]:
gama_1089.dropna(inplace=True)

In [5]:
gama_1089

Unnamed: 0,X,Y,MDT,KPERC,eU,eth,CTCOR,THKRAZAO,UKRAZAO,UTHRAZAO,LONGITUDE,LATITUDE
8,528115.65,8756161.26,522.35,0.38,1.46,18.83,1371.89,49.7742,3.8425,0.0772,-44.742414,-11.251846
9,528035.37,8756163.26,522.77,0.43,1.06,15.94,1170.76,37.2496,2.4835,0.0667,-44.743149,-11.251829
10,527955.11,8756164.95,522.11,0.44,1.08,13.13,1016.51,29.9028,2.4507,0.0820,-44.743884,-11.251814
11,527874.82,8756166.56,522.05,0.42,1.47,11.12,946.89,26.7573,3.5533,0.1328,-44.744620,-11.251800
12,527794.50,8756168.02,522.57,0.43,1.78,10.41,948.79,24.2187,4.1092,0.1697,-44.745356,-11.251787
...,...,...,...,...,...,...,...,...,...,...,...,...
668816,665126.58,8782417.52,422.06,0.29,0.59,7.02,562.15,23.8827,1.9898,0.0833,-43.488568,-11.010735
668817,665130.89,8782340.24,422.94,0.33,0.61,7.26,588.93,22.3755,1.9209,0.0858,-43.488525,-11.011434
668818,665135.62,8782262.93,423.37,0.36,0.66,7.69,630.33,21.1524,1.7839,0.0843,-43.488478,-11.012132
668819,665140.29,8782185.57,422.44,0.40,0.48,7.39,599.77,18.7542,1.2390,0.0661,-43.488431,-11.012832


In [10]:
gama_line_1089.dropna(inplace=True)

In [11]:
f.descricao(gama_line_1089)

Listando atributos dos dados geofisicos
# --- # Listagem de dados do aerolevantamento:  
Lista de atributos geofísicos = ['MDT', 'KPERC', 'eU', 'eth', 'CTCOR', 'THKRAZAO', 'UKRAZAO', 'UTHRAZAO']
lista de atributos geograficos = ['LONGITUDE', 'LATITUDE']
lista de atributos projetados = ['X', 'Y']


(             dType  Valores Faltantes  Valores Únicos  Amostragem
 X           object                  0          511916      635173
 Y           object                  0          625447      635173
 MDT        float64                  0           43029      635173
 KPERC      float64                  0             534      635173
 eU         float64                  0             872      635173
 eth        float64                  0            4141      635173
 CTCOR      float64                  0          171198      635173
 THKRAZAO   float64                  0          431058      635173
 UKRAZAO    float64                  0          173144      635173
 UTHRAZAO   float64                  0            6152      635173
 LONGITUDE  float64                  0          628804      635173
 LATITUDE   float64                  0          632826      635173,
 ['MDT', 'KPERC', 'eU', 'eth', 'CTCOR', 'THKRAZAO', 'UKRAZAO', 'UTHRAZAO'],
 ['LONGITUDE', 'LATITUDE'],
 ['X', 'Y'],
           

In [7]:
gama_1089.head()

Unnamed: 0,X,Y,MDT,KPERC,eU,eth,CTCOR,THKRAZAO,UKRAZAO,UTHRAZAO,LONGITUDE,LATITUDE
8,528115.65,8756161.26,522.35,0.38,1.46,18.83,1371.89,49.7742,3.8425,0.0772,-44.742414,-11.251846
9,528035.37,8756163.26,522.77,0.43,1.06,15.94,1170.76,37.2496,2.4835,0.0667,-44.743149,-11.251829
10,527955.11,8756164.95,522.11,0.44,1.08,13.13,1016.51,29.9028,2.4507,0.082,-44.743884,-11.251814
11,527874.82,8756166.56,522.05,0.42,1.47,11.12,946.89,26.7573,3.5533,0.1328,-44.74462,-11.2518
12,527794.5,8756168.02,522.57,0.43,1.78,10.41,948.79,24.2187,4.1092,0.1697,-44.745356,-11.251787


In [13]:
#gama_1089.to_csv(gdb+'geof/gama_1089',index=False)
gama_line_1089.to_csv(gdb+'geof/gama_line_1089',index=False)

In [10]:
mLine_1089 = pd.read_csv(gdb+'xyz/1089_XYZ/XYZ/1089_MagLine.XYZ',
                         names=mag_cols,
                         delim_whitespace=True,
                         skiprows=11,
                         usecols=["X","Y","LATITUDE","LONGITUDE",
                                  "MAGIGRF","ALTURA","MDT"],
                         na_values=('*'))

  exec(code_obj, self.user_global_ns, self.user_ns)


In [11]:
mTie_1089 = pd.read_csv(gdb+'xyz/1089_XYZ/XYZ/1089_MagTie.XYZ',
                         names=mag_cols,
                         delim_whitespace=True,
                         skiprows=11,
                         usecols=["X","Y","LATITUDE","LONGITUDE",
                                  "MAGIGRF","ALTURA","MDT"],
                         na_values=('*'))

  exec(code_obj, self.user_global_ns, self.user_ns)


In [12]:
m_1089 = pd.concat([mTie_1089,mLine_1089], ignore_index=True)

In [13]:
mag_1089 = m_1089.dropna()

In [14]:
mag_1089.to_csv(gdb+'geof/mag_1089',index=False)

In [16]:
a = f.descricao(mag_1089)

# --- # Listagem de dados do aerolevantamento:  
Lista de atributos geofísicos = ['ALTURA', 'MDT', 'MAGIGRF']
lista de atributos geograficos = ['LONGITUDE', 'LATITUDE']
lista de atributos projetados = ['X', 'Y']


In [19]:
a[0]

Unnamed: 0,dType,Valores Faltantes,Valores Únicos,Amostragem
X,object,0,2147452,6676787
Y,object,0,5686864,6676787
ALTURA,float64,0,15192,6676787
MDT,float64,0,49045,6676787
MAGIGRF,float64,0,402447,6676787
LONGITUDE,float64,0,6280335,6676787
LATITUDE,float64,0,6499843,6676787


In [18]:
a[4].T

Unnamed: 0,count,mean,std,min,0.1%,10%,25%,50%,75%,99.5%,max
ALTURA,6676787.0,108.161005,23.916865,-52.43,60.4,86.57,94.82,103.99,115.24,235.3,400.19
MDT,6676787.0,508.932328,85.382206,290.64,312.21,400.97,450.46,501.29,564.38,722.2,795.55
MAGIGRF,6676787.0,12.122186,56.913451,-1004.673,-363.92307,-47.375,-3.245,25.07,42.505,139.15107,504.515
