# Terremotos en Mexico

Proceso ETL:

+ Extraccion de datos desde el servicio sismologico nacional de la UNAM: http://www.ssn.unam.mx
+ Transformacion necesaria de los datos
+ Carga de datos en base de datos y mapa.

In [1]:
import warnings
warnings.simplefilter('ignore')

import requests as req
import xmltodict
import re

import pandas as pd

**Extraccion**

In [2]:
url='http://www.ssn.unam.mx/rss/ultimos-sismos.xml'

data=req.get(url).content

In [12]:
xmltodict.parse(data)['rss']['channel']['item'][0]

OrderedDict([('title', '4.0, 89 km al SUROESTE de  PIJIJIAPAN, CHIS'),
             ('description',
              '<p>Fecha:2021-03-29 02:05:19 (Hora de M&eacute;xico)<br/>Lat/Lon: 15.1/-93.78<br/>Profundidad: 37.0 km </p>'),
             ('link',
              'http://www2.ssn.unam.mx:8080/jsp/localizacion-de-sismo.jsp?latitud=15.1&longitud=-93.78&prf=37.0 km&ma=4.0&fecha=2021-03-29&hora=02:05:19&loc=89 km al SUROESTE de  PIJIJIAPAN, CHIS  &evento=1'),
             ('geo:lat', '15.1'),
             ('geo:long', '-93.78')])

In [13]:
len(xmltodict.parse(data)['rss']['channel']['item'])

15

In [15]:
xmltodict.parse(data)['rss']['channel']['item'][0]['title']

'4.0, 89 km al SUROESTE de  PIJIJIAPAN, CHIS'

In [16]:
def get_data():   # funcion de extraccion de datos
    
    url='http://www.ssn.unam.mx/rss/ultimos-sismos.xml'  # viene de un RSS
    
    data=req.get(url).content
    
    return xmltodict.parse(data)

**Transformacion**

In [36]:
def get_dictio():
    
    d=get_data()
    
    res=[]   # rollo JSON
    
    lst=d['rss']['channel']['item']   # lista de elementos
    
    for e in lst:
        
        link=e['link']
        
        fecha=re.findall('fecha=\d+-\d+-\d+', link)[0].split('=')[1]
        hora=re.findall('hora=\d+:\d+:\d+', link)[0].split('=')[1]
        lat=re.findall('latitud=-?\d+.\d+', link)[0].split('=')[1]
        lng=re.findall('longitud=-?\d+.\d+', link)[0].split('=')[1]    
        inten=float(re.findall('ma=\d+.\d+', link)[0].split('=')[1])
        prof=re.findall('prf=\d+.\d+', link)[0].split('=')[1] 
        
        data={'Fecha': fecha, 
              'Hora': hora, 
              'Latitud': lat, 
              'Longitud': lng, 
              'Profundidad': prof, 
              'Intensidad': inten}
        
        res.append(data)
        
    return res

In [38]:
get_dictio()[0]

{'Fecha': '2021-03-29',
 'Hora': '02:05:19',
 'Latitud': '15.1',
 'Longitud': '-93.78',
 'Profundidad': '37.0',
 'Intensidad': 4.0}

In [39]:
lst=xmltodict.parse(data)['rss']['channel']['item'][0]['link'].split('?')[1].split('&')

for e in lst:
    print(e.split('='))

['latitud', '15.1']
['longitud', '-93.78']
['prf', '37.0 km']
['ma', '4.0']
['fecha', '2021-03-29']
['hora', '02:05:19']
['loc', '89 km al SUROESTE de  PIJIJIAPAN, CHIS  ']
['evento', '1']


In [42]:
{k:v for k,v in [e.split('=') for e in \
                xmltodict.parse(data)['rss']['channel']['item'][0]['link'].split('?')[1].split('&')]}

{'latitud': '15.1',
 'longitud': '-93.78',
 'prf': '37.0 km',
 'ma': '4.0',
 'fecha': '2021-03-29',
 'hora': '02:05:19',
 'loc': '89 km al SUROESTE de  PIJIJIAPAN, CHIS  ',
 'evento': '1'}

In [43]:
from urllib.parse import urlparse

u=urlparse(xmltodict.parse(data)['rss']['channel']['item'][0]['link'])
u.query

'latitud=15.1&longitud=-93.78&prf=37.0 km&ma=4.0&fecha=2021-03-29&hora=02:05:19&loc=89 km al SUROESTE de  PIJIJIAPAN, CHIS  &evento=1'

In [44]:
u.query.split('&')

['latitud=15.1',
 'longitud=-93.78',
 'prf=37.0 km',
 'ma=4.0',
 'fecha=2021-03-29',
 'hora=02:05:19',
 'loc=89 km al SUROESTE de  PIJIJIAPAN, CHIS  ',
 'evento=1']

In [45]:
{k:v for k,v in [e.split('=') for e in u.query.split('&')]}

{'latitud': '15.1',
 'longitud': '-93.78',
 'prf': '37.0 km',
 'ma': '4.0',
 'fecha': '2021-03-29',
 'hora': '02:05:19',
 'loc': '89 km al SUROESTE de  PIJIJIAPAN, CHIS  ',
 'evento': '1'}

**Carga base datos de postgres**

In [51]:
from sqlalchemy import DDL, create_engine, Column, Integer, Text, Float
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base

from sqlalchemy.schema import Sequence

In [48]:
#sudo -u iudh createdb terremotos

SCHEMA='terremotos'

In [49]:
str_conn=f'postgres://iudh:password@localhost:5432/{SCHEMA}'

In [50]:
# conexion base datos

class Conector:
    def __init__(self, str_conn):
        print('Conectando...')
        self.motor=create_engine(str_conn)
        self.sesion=sessionmaker(bind=self.motor)()

In [54]:
# tabla de la base de datos

Base=declarative_base()

class Mexico(Base):
    
    __tablename__='mexico'
    __table_args__={'schema': SCHEMA}
    
    id=Column(Integer(), 
              Sequence('mexico_id_seq', start=1, increment=1),
              primary_key=True)
    
    fecha=Column(Text())
    hora=Column(Text())
    lat=Column(Float())
    lng=Column(Float())
    inten=Column(Float())
    prof=Column(Text())

In [63]:
class Prueba:
    
    def __init__(self, str_conn):
        
        conn=Conector(str_conn)
        
        self.motor=conn.motor
        self.sesion=conn.sesion
        
        print('Conectado')
        
    
    def crea_tabla(self):
        if not self.motor.dialect.has_table(self.motor, Mexico.__tablename__, schema=SCHEMA):
            print('Creando tabla...')
            Mexico.__table__.create(self.motor)
        else:
            print('Ya existe tabla.')
            
            
            
    def borra_tabla(self):
        if self.motor.dialect.has_table(self.motor, Mexico.__tablename__, schema=SCHEMA):
            print('Borrando tabla...')
            Mexico.__table__.drop(self.motor)
        else:
            print('No existe tabla.')
            
            
            
    def rellena_tabla(self, datos):
        
        for e in datos:
            
            item=Mexico(fecha=e['Fecha'],
                        hora=e['Hora'],
                        lat=e['Latitud'],
                        lng=e['Longitud'],
                        prof=e['Profundidad'],
                        inten=e['Intensidad'])
            
            self.sesion.add(item)
            
        self.sesion.commit()
        print('Comiteado.')
    
    
    
    def show_df(self):
        data=self.motor.execute(DDL(f'select * from {SCHEMA}.mexico')).fetchall()
        columns=self.motor.execute(DDL(f"select * from {SCHEMA}.information_schema.columns where table_name='mexico'")).fetchall()
        return pd.DataFrame(data, columns=[e[3] for e in columns])
        

In [64]:
sql=Prueba(str_conn)

Conectando...
Conectado


In [65]:
sql.borra_tabla()

Borrando tabla...


In [66]:
sql.crea_tabla()

Creando tabla...


In [67]:
df=pd.read_csv('../datasets/ssnUnam.csv').to_dict('records')

sql.rellena_tabla(df)   # dataframe que ya no es un dataframe

Comiteado.


In [69]:
sql.show_df().head()

Unnamed: 0,id,fecha,hora,lat,lng,inten,prof
0,1,2020-11-12,05:58:17,15.82,-95.04,4.3,16.0 km
1,2,2020-11-12,04:48:05,16.21,-96.46,3.7,65.0 km
2,3,2020-11-12,04:21:59,16.68,-97.47,3.1,13.0 km
3,4,2020-11-12,04:14:19,15.37,-94.77,3.8,16.0 km
4,5,2020-11-12,04:13:53,17.36,-101.65,3.4,5.0 km


In [71]:
sql.rellena_tabla(get_dictio())

Comiteado.


In [75]:
sql.show_df().tail(15)

Unnamed: 0,id,fecha,hora,lat,lng,inten,prof
60,61,2021-03-29,02:05:19,15.1,-93.78,4.0,37.0
61,62,2021-03-29,01:06:01,17.54,-94.85,4.1,144.0
62,63,2021-03-29,00:18:50,15.52,-93.21,4.0,94.0
63,64,2021-03-29,00:04:36,15.4,-94.8,4.2,24.0
64,65,2021-03-28,22:18:45,19.72,-105.52,4.2,16.0
65,66,2021-03-28,16:20:34,14.86,-93.61,3.7,11.0
66,67,2021-03-28,15:34:12,16.29,-98.11,3.3,12.0
67,68,2021-03-28,15:28:09,15.82,-93.2,3.3,67.0
68,69,2021-03-28,15:05:02,16.23,-94.41,3.5,63.0
69,70,2021-03-28,14:22:00,28.29,-105.07,4.1,5.0


In [77]:
sql.show_df().info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 75 entries, 0 to 74
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   id      75 non-null     int64  
 1   fecha   75 non-null     object 
 2   hora    75 non-null     object 
 3   lat     75 non-null     float64
 4   lng     75 non-null     float64
 5   inten   75 non-null     float64
 6   prof    75 non-null     object 
dtypes: float64(3), int64(1), object(3)
memory usage: 4.2+ KB


**Mapa (heatmap)**

In [78]:
import folium
from folium import plugins

In [85]:
def load_map(intensidad=0):
    
    data=sql.show_df()[sql.show_df().inten>intensidad]
    #display(data.head())
    
    value_data=data[['lat', 'lng']].values
    
    mapa=folium.Map([19.42, -99.12], zoom_start=4)
    
    mapa.add_child(plugins.HeatMap(value_data, radius=15))
    
    mapa.save('../images/quakes_map_mex.html')
    
    display(mapa)

In [86]:
load_map()

In [83]:
load_map(3)

Unnamed: 0,id,fecha,hora,lat,lng,inten,prof
0,1,2020-11-12,05:58:17,15.82,-95.04,4.3,16.0 km
1,2,2020-11-12,04:48:05,16.21,-96.46,3.7,65.0 km
2,3,2020-11-12,04:21:59,16.68,-97.47,3.1,13.0 km
3,4,2020-11-12,04:14:19,15.37,-94.77,3.8,16.0 km
4,5,2020-11-12,04:13:53,17.36,-101.65,3.4,5.0 km


In [84]:
load_map(4)

Unnamed: 0,id,fecha,hora,lat,lng,inten,prof
0,1,2020-11-12,05:58:17,15.82,-95.04,4.3,16.0 km
15,16,2020-11-20,02:43:53,16.18,-94.54,4.9,87.0 km
16,17,2020-11-20,00:09:52,15.67,-96.29,4.2,19.0 km
23,24,2020-11-19,14:58:25,14.68,-95.25,4.1,15.0 km
30,31,2020-11-20,18:05:49,24.11,-108.96,4.2,16.0 km


**Heatmap animado**

In [87]:
from folium.plugins import HeatMapWithTime as HMWT

In [105]:
def animated_map(intensidad=0):
    
    data=sql.show_df()[sql.show_df().inten>intensidad]
    
    data['count']=1
    
    data['hour']=data.hora.apply(lambda x: int(x.split(':')[0]))
    
    value_data=[data.loc[data.hour==h, ['lat', 'lng', 'count']].groupby(['lat', 'lng']).sum()\
                    .reset_index().values.tolist()
               for h in data.hour.sort_values().unique()]
    
    mapa=folium.Map([19.42, -99.12], zoom_start=4)
    
    HMWT(value_data,
         radius=15,
         gradient={.2:'blue', .4:'lime', .6:'orange', 1:'red'},
         min_opacity=.5,
         max_opacity=.5,
         use_local_extrema=True).add_to(mapa)
    
    mapa.save('../images/animated_quakes_map_mex.html')
    
    display(mapa)

In [106]:
animated_map()

In [107]:
animated_map(4)