<a href="https://colab.research.google.com/github/Rogerio-mack/VLF/blob/main/VLF_Database.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [23]:
import pandas as pd
import numpy as np
import os

[Data here](https://drive.google.com/drive/folders/1mUPDRlzSqxuFMl6L96tPBQKprPY223yT?usp=sharing)

In [24]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Routines

In [25]:
def get_file_properties(path_file, verbose=False):
  import os  
  import yaml

  type = 'Undentified'  
  extension = '-'  
  size = 0

  if os.path.isdir(path_file):  
    type = 'Dir'  
    extension = '-'
    for f in os.scandir(path_file):
      size = size + os.stat(f).st_size
    nr_files = len(os.listdir(path_file))
  elif os.path.isfile(path_file):  
    type = 'File'  
    extension = path_file.split('/')[-1].split('.')[-1]
    size = os.path.getsize(path_file)

  file_properties = {}
  file_properties['path_file'] = path_file
  file_properties['type'] = type
  if type == 'Dir':
    file_properties['nr_files'] = nr_files
  file_properties['extension'] = extension
  file_properties['size'] = size

  if verbose:
    print(yaml.dump(file_properties, default_flow_style=False))

  return file_properties 

_ = get_file_properties('/content/drive/MyDrive/VLF/data',verbose=True)
_ = get_file_properties('/content/drive/MyDrive/VLF/data/Narrowband/AntarticaFerraz/2007_07_10',verbose=True)
_ = get_file_properties('/content/drive/MyDrive/VLF/data/Narrowband/AntarticaFerraz/2007_07_10/FE070710000500NAA_006A.mat',verbose=True)


extension: '-'
nr_files: 2
path_file: /content/drive/MyDrive/VLF/data
size: 8192
type: Dir

extension: '-'
nr_files: 2
path_file: /content/drive/MyDrive/VLF/data/Narrowband/AntarticaFerraz/2007_07_10
size: 412575
type: Dir

extension: mat
path_file: /content/drive/MyDrive/VLF/data/Narrowband/AntarticaFerraz/2007_07_10/FE070710000500NAA_006A.mat
size: 206292
type: File



In [26]:
def get_file_tree1(path,verbose=False):
  import os  
  import yaml

  if get_file_properties(path)['type'] != 'Dir':
    print(path, ' is not a path')
    return

  file_tree1 = {}
  file_tree1['path'] = path
  file_tree1['properties'] = get_file_properties(path)

  tree = {}
  for f in os.listdir(path):
    tree[f] = get_file_properties(path + '/' + f)
    
  file_tree1['files'] = tree
  
  if verbose:
    print(yaml.dump(file_tree1, default_flow_style=False))

  return  file_tree1

all_files = get_file_tree1('/content/drive/MyDrive/VLF/data/Narrowband/AntarticaFerraz/2007_07_10',verbose=True)

files:
  FE070710000500NAA_006A.mat:
    extension: mat
    path_file: /content/drive/MyDrive/VLF/data/Narrowband/AntarticaFerraz/2007_07_10/FE070710000500NAA_006A.mat
    size: 206292
    type: File
  FE070710000500NAA_006B.mat:
    extension: mat
    path_file: /content/drive/MyDrive/VLF/data/Narrowband/AntarticaFerraz/2007_07_10/FE070710000500NAA_006B.mat
    size: 206283
    type: File
path: /content/drive/MyDrive/VLF/data/Narrowband/AntarticaFerraz/2007_07_10
properties:
  extension: '-'
  nr_files: 2
  path_file: /content/drive/MyDrive/VLF/data/Narrowband/AntarticaFerraz/2007_07_10
  size: 412575
  type: Dir



In [27]:
# Filename: XXYYMMDDHHMMSSZZZ_ACCT.mat

# XX – Station ID
# YY – Year
# MM — Month
# DD — Day
# HH — Hour
# MM — Minute
# SS — Second
# ZZZ — Transmitter Callsign
# A — Usually not used
# CC — 00 for N/S channel, 01 for E/W channel
# T — Type of data
# A is low resolution (1 Hz sampling rate) amplitude
# B is low resolution (1 Hz sampling rate) phase
# C is high resolution (50 Hz sampling rate) amplitude
# D is high resolution (50 Hz sampling rate) phase
# F is high resolution (50 Hz sampling rate) effective group delay

# XXYYMMDDHHMMSSZZZ_ACCT.mat
# 01234567890123456789012345

def get_narrowbroad_file_info(pathfile,type='N',verbose=False):

  import yaml

  narrowband_format_dict = {
  'Station_ID':[0,1],
  'Year':[2,3],
  'Month':[4,5],
  'Day':[6,7],
  'Hour':[8,9],
  'Minute':[10,11],
  'Second':[12,13],
  'Transmitter':[14,16],
  '_':[17,17],
  'Usually not used':[18,18],
  'CC':[19,20],
  # CC — 00 for N/S channel, 01 for E/W channel
  'Type_ABCDF':[21,21],
  # A is low resolution (1 Hz sampling rate) amplitude
  # B is low resolution (1 Hz sampling rate) phase
  # C is high resolution (50 Hz sampling rate) amplitude
  # D is high resolution (50 Hz sampling rate) phase
  # F is high resolution (50 Hz sampling rate) effective group delay
  '.mat':[22,25]
  }

#  CC_map = {'00':'N/S','01':'E/W'} ...I have found 06 in AntarticaFerraz

  Type_ABCDF_map = {'A': ['low','amplitude'],
  'B': ['low','phase'],
  'C': ['high','amplitude'],
  'D': ['high','phase'],
  'F': ['high','group delay']
  }

  info = {}
  
  if type == 'N':
    info['filename'] = pathfile.split('/')[-1] 
    info['location'] = pathfile
    info['size'] = get_file_properties(pathfile)['size']
    for key in narrowband_format_dict.keys():
      info[key] = info['filename'][narrowband_format_dict[key][0]:narrowband_format_dict[key][1]+1]
#    info['CC'] = CC_map[info['CC']]
    info['resolution'] = Type_ABCDF_map[info['Type_ABCDF']][0]
    info['amplitude_phase'] = Type_ABCDF_map[info['Type_ABCDF']][1]
    
    info['Station_name'] = pathfile.split('/')[-3] 

  if verbose:
    print(yaml.dump(info, default_flow_style=False))

  return info 

_ = get_narrowbroad_file_info('/content/drive/MyDrive/VLF/data/Narrowband/AntarticaFerraz/2007_07_10/FE070710000500NAA_006A.mat',type='N',verbose=True)

.mat: .mat
CC: '06'
Day: '10'
Hour: '00'
Minute: '05'
Month: '07'
Second: '00'
Station_ID: FE
Station_name: AntarticaFerraz
Transmitter: NAA
Type_ABCDF: A
Usually not used: '0'
Year: '07'
_: _
amplitude_phase: amplitude
filename: FE070710000500NAA_006A.mat
location: /content/drive/MyDrive/VLF/data/Narrowband/AntarticaFerraz/2007_07_10/FE070710000500NAA_006A.mat
resolution: low
size: 206292



In [28]:
def build_narrowband(pathname, verbose=False, trace=False):

  import yaml

  narrowband_data = {}

  for station, desc in get_file_tree1(pathname)['files'].items():
    if desc['type'] == 'Dir': # to make sure is a directory
      narrowband_data[station] = {'location': desc['path_file']}

  if trace:
    print(yaml.dump(narrowband_data , default_flow_style=False))

  for station, desc_narrow in narrowband_data.items():
    narrowband_data[station]['files'] = {}
    for yy_mm_dd, desc_tree in get_file_tree1(desc_narrow['location'])['files'].items():
      if desc_tree['type'] == 'Dir': # to make sure is a directory
        for f, desc_tree2 in get_file_tree1(desc_tree['path_file'])['files'].items():
          if desc_tree2['type'] == 'File':
            narrowband_data[station]['files'][f] = {'path_file': desc_tree2['path_file']}

  if trace:
    print(yaml.dump(narrowband_data , default_flow_style=False))

  for station, desc_narrow in narrowband_data.items():
    dates = {}
    transmitters = {}
    for f, desc_file in desc_narrow['files'].items():
      yymmdd = get_narrowbroad_file_info(desc_file['path_file'],type='N')['Year'] + \
              get_narrowbroad_file_info(desc_file['path_file'],type='N')['Month'] + \
              get_narrowbroad_file_info(desc_file['path_file'],type='N')['Day']
      if yymmdd in dates.keys():
        dates[yymmdd] = dates[yymmdd] + 1
      else:
        dates[yymmdd] = 1

      transm = get_narrowbroad_file_info(desc_file['path_file'],type='N')['Transmitter'] 
      if transm in transmitters.keys():
        transmitters[transm] = transmitters[transm] + 1
      else:
        transmitters[transm] = 1
        
    narrowband_data[station]['dates'] = dates
    narrowband_data[station]['transmitters'] = transmitters
    
  if verbose:
    print(yaml.dump(narrowband_data , default_flow_style=False))

  return narrowband_data

nb = build_narrowband('/content/drive/MyDrive/VLF/data/Narrowband', verbose=True)

AntarticaFerraz:
  dates:
    '070710': 2
  files:
    FE070710000500NAA_006A.mat:
      path_file: /content/drive/MyDrive/VLF/data/Narrowband/AntarticaFerraz/2007_07_10/FE070710000500NAA_006A.mat
    FE070710000500NAA_006B.mat:
      path_file: /content/drive/MyDrive/VLF/data/Narrowband/AntarticaFerraz/2007_07_10/FE070710000500NAA_006B.mat
  location: /content/drive/MyDrive/VLF/data/Narrowband/AntarticaFerraz
  transmitters:
    NAA: 2
PalmerStation:
  dates:
    '110105': 11
    '110526': 11
  files:
    PA110105000000DHO_100A.mat:
      path_file: /content/drive/MyDrive/VLF/data/Narrowband/PalmerStation/2011_01_05/PA110105000000DHO_100A.mat
    PA110105000000DHO_100B.mat:
      path_file: /content/drive/MyDrive/VLF/data/Narrowband/PalmerStation/2011_01_05/PA110105000000DHO_100B.mat
    PA110105000000DHO_100C.mat:
      path_file: /content/drive/MyDrive/VLF/data/Narrowband/PalmerStation/2011_01_05/PA110105000000DHO_100C.mat
    PA110105000000DHO_100D.mat:
      path_file: /content/dr

# Build Database

In [29]:
nb = build_narrowband('/content/drive/MyDrive/VLF/data/Narrowband', verbose=True)

AntarticaFerraz:
  dates:
    '070710': 2
  files:
    FE070710000500NAA_006A.mat:
      path_file: /content/drive/MyDrive/VLF/data/Narrowband/AntarticaFerraz/2007_07_10/FE070710000500NAA_006A.mat
    FE070710000500NAA_006B.mat:
      path_file: /content/drive/MyDrive/VLF/data/Narrowband/AntarticaFerraz/2007_07_10/FE070710000500NAA_006B.mat
  location: /content/drive/MyDrive/VLF/data/Narrowband/AntarticaFerraz
  transmitters:
    NAA: 2
PalmerStation:
  dates:
    '110105': 11
    '110526': 11
  files:
    PA110105000000DHO_100A.mat:
      path_file: /content/drive/MyDrive/VLF/data/Narrowband/PalmerStation/2011_01_05/PA110105000000DHO_100A.mat
    PA110105000000DHO_100B.mat:
      path_file: /content/drive/MyDrive/VLF/data/Narrowband/PalmerStation/2011_01_05/PA110105000000DHO_100B.mat
    PA110105000000DHO_100C.mat:
      path_file: /content/drive/MyDrive/VLF/data/Narrowband/PalmerStation/2011_01_05/PA110105000000DHO_100C.mat
    PA110105000000DHO_100D.mat:
      path_file: /content/dr

# Class and Funcions for Database Object

In [30]:
class nb_class:
  def __init__(self, nb):
    self.nb = nb

  def stations(self):
    return list(nb.keys())

  def dates_available(self,station):
    return list(nb[station]['dates'])  

  def files_available(self,station):
    return list(nb[station]['files']) 

  def transmitters_available(self,station):
    return list(nb[station]['transmitters'])  

  def get_path_file(self,station,file):
    return nb[station]['files'][file]['path_file'] 

nb_obj = nb_class(nb)

print('\nEstações disponíveis: ')
print( nb_obj.stations() )

print('\nDatas disponíveis para PalmerStation: ')
print( nb_obj.dates_available('PalmerStation') )

print('\nArquivos disponíveis para PalmerStation: ')
print( nb_obj.files_available('PalmerStation') )

print('\nTransmitters disponíveis para PalmerStation: ')
print( nb_obj.transmitters_available('PalmerStation') )

print('\nPath para acessar um dado arquivo da base: ')
print( nb_obj.get_path_file('PalmerStation','PA110526000000DHO_100B.mat') )

print('\nInfo de um dado arquivo da base: ')
pathfile = nb_obj.get_path_file('PalmerStation','PA110526000000DHO_100B.mat')
get_narrowbroad_file_info(pathfile,type='N',verbose=False)




Estações disponíveis: 
['SouthPole', 'PalmerStation', 'AntarticaFerraz']

Datas disponíveis para PalmerStation: 
['110526', '110105']

Arquivos disponíveis para PalmerStation: 
['PA110526000000DHO_101B.mat', 'PA110526000000DHO_101A.mat', 'PA110526000000DHO_100B.mat', 'PA110526000000DHO_100A.mat', 'PA110526000000DHO_101F.mat', 'PA110526000000DHO_100F.mat', 'PA110526000000DHO_100C.mat', 'PA110526000000DHO_101C.mat', 'PA110526000000DHO_100D.mat', 'PA110526000000DHO_101D.mat', 'PA110526000000GBZ_100A.mat', 'PA110105000000DHO_100A.mat', 'PA110105000000DHO_101A.mat', 'PA110105000000DHO_100B.mat', 'PA110105000000DHO_101B.mat', 'PA110105000000DHO_101C.mat', 'PA110105000000DHO_100C.mat', 'PA110105000000GBZ_100A.mat', 'PA110105000000DHO_100D.mat', 'PA110105000000DHO_101D.mat', 'PA110105000000GBZ_100B.mat', 'PA110105000000GBZ_100C.mat']

Transmitters disponíveis para PalmerStation: 
['DHO', 'GBZ']

Path para acessar um dado arquivo da base: 
/content/drive/MyDrive/VLF/data/Narrowband/PalmerStati

{'.mat': '.mat',
 'CC': '00',
 'Day': '26',
 'Hour': '00',
 'Minute': '00',
 'Month': '05',
 'Second': '00',
 'Station_ID': 'PA',
 'Station_name': 'PalmerStation',
 'Transmitter': 'DHO',
 'Type_ABCDF': 'B',
 'Usually not used': '1',
 'Year': '11',
 '_': '_',
 'amplitude_phase': 'phase',
 'filename': 'PA110526000000DHO_100B.mat',
 'location': '/content/drive/MyDrive/VLF/data/Narrowband/PalmerStation/2011_05_26/PA110526000000DHO_100B.mat',
 'resolution': 'low',
 'size': 369270}