In [1]:
import pandas as pd

In [2]:
from lxml import etree
import requests

INFRAERO_URL = "http://voos.infraero.gov.br/wsvoosmobile/ConsultaVoos.svc"
USER_AGENT = "Infraero%20Voos%20Online/2.6 CFNetwork/758.1.6 Darwin/15.0.0"


class Infraero(object):
    
    def __init__(self, url=INFRAERO_URL, user_agent=USER_AGENT):
        self.url = url
        
        self.session = requests.Session()
        self.session.headers['User-Agent'] = user_agent

    def _get_flights_envelope(self, icao, language, departure, finalized, per_page, page_num):
    
        template = ('<?xml version="1.0" encoding="utf-8"?>'
                    '<soap:Envelope'
                    ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"'
                    ' xmlns:xsd="http://www.w3.org/2001/XMLSchema"'
                    ' xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/"'
                    ' xmlns="http://tempuri.org/">'
                    '<soap:Body>'
                    '<ConsultarVoosSentido>'
                    '<icao>{icao}</icao>'
                    '<idioma>{language}</idioma>'
                    '<partida>{departure}</partida>'
                    '<exibirFinalizados>{finalized}</exibirFinalizados>'
                    '<registrosPagina>{per_page:d}</registrosPagina>'
                    '<pagina>{page_num:d}</pagina>'
                    '</ConsultarVoosSentido>'
                    '</soap:Body>'
                    '</soap:Envelope>')

        context = {
            "icao": icao,
            "language": language,
            "departure": "true" if departure else "false",
            "finalized": "true" if finalized else "false",
            "per_page": per_page,
            "page_num": page_num,
        }

        envelope = template.format(**context)

        return envelope
    
    def get_flights(self,
                    icao,
                    departure=True,
                    finalized=False,
                    language="bra",
                    per_page=20,
                    page_num=1):
        
        envelope = self._get_flights_envelope(icao, language, departure, finalized, per_page, page_num)
        
        request_headers = {
            "SOAPAction": "http://tempuri.org/ConsultarVoosSentido",
            "Content-Type": "text/xml",
        }
        
        req = self.session.post(
            self.url,
            headers=request_headers,
            data=envelope
        )
        
        return self.parse_flights_response(req.text)
    
    def parse_flights_response(self, response):
        
        # Parse outer XML
        env = etree.XML(response)
        env_body = env.getchildren()[0]
        flights_resp_element = env_body.getchildren()[0]
        flights_result_element = flights_resp_element.getchildren()[0]
        
        # Extract second XML from first XML
        flights_resp_xml = flights_result_element.text.encode('latin1')
        
        # Parse inner XML
        flights = etree.XML(flights_resp_xml)
        
        parsed_flights = []
        
        for flight in flights.getchildren():
            
            curr_flight = {}
            
            for prop in flight.getchildren():
                curr_flight[prop.tag] = prop.text
            parsed_flights.append(curr_flight)
        
        return parsed_flights

In [3]:
infraero = Infraero()

In [4]:
flights = infraero.get_flights("SBGL")

In [5]:
df_flights = pd.DataFrame.from_dict(flights, orient="columns")
df_flights = df_flights[['NOM_CIA', 'NUM_VOO', 'HOR_PREV', 'HOR_CONF', 'NUM_GATE', 'NOM_AEROPORTO', 'NOM_PAIS', 'TXT_OBS']]
df_flights.columns = ['CIA', 'VOO', 'STD', 'ETD', 'Gate', 'Destino', 'Pais', 'Status']
df_flights

Unnamed: 0,CIA,VOO,STD,ETD,Gate,Destino,Pais,Status
0,PASSAREDO,2259,16:04,16:06,,Leite Lopes,Brasil,Aeronave Decolando
1,TAM,2259,16:04,16:06,,Leite Lopes,Brasil,Aeronave Decolando
2,GOL,1611,16:44,16:44,4.0,Congonhas,Brasil,Última Chamada
3,ALITALIA,673,17:00,17:00,10.0,Roma /Int.,Itália,Última Chamada
4,GOL,1778,17:02,17:02,8.0,Vitoria,Brasil,Embarque Próximo
5,TAM,3307,17:07,17:07,25.0,Internacional / Sao Paulo,Brasil,Embarque Imediato
6,AMERICAN AIRLINES,7784,17:07,17:07,25.0,Pinto Martins /Int.,Brasil,Embarque Imediato
7,US AIRWAYS,7819,17:07,17:07,25.0,Pinto Martins /Int.,Brasil,Embarque Imediato
8,LAN-CHILE,6505,17:07,17:07,25.0,Pinto Martins /Int.,Brasil,Embarque Imediato
9,LAN-CHILE,7784,17:07,17:07,25.0,Pinto Martins /Int.,Brasil,Embarque Imediato


In [6]:
print df_flights.to_json(orient="records")

[{"CIA":"PASSAREDO","VOO":"02259","STD":"16:04","ETD":"16:06","Gate":null,"Destino":"Leite Lopes","Pais":"Brasil","Status":"Aeronave Decolando"},{"CIA":"TAM","VOO":"02259","STD":"16:04","ETD":"16:06","Gate":null,"Destino":"Leite Lopes","Pais":"Brasil","Status":"Aeronave Decolando"},{"CIA":"GOL","VOO":"01611","STD":"16:44","ETD":"16:44","Gate":"04","Destino":"Congonhas","Pais":"Brasil","Status":"\u00daltima Chamada"},{"CIA":"ALITALIA","VOO":"00673","STD":"17:00","ETD":"17:00","Gate":"10","Destino":"Roma \/Int.","Pais":"It\u00e1lia","Status":"\u00daltima Chamada"},{"CIA":"GOL","VOO":"01778","STD":"17:02","ETD":"17:02","Gate":"08","Destino":"Vitoria","Pais":"Brasil","Status":"Embarque Pr\u00f3ximo"},{"CIA":"TAM","VOO":"03307","STD":"17:07","ETD":"17:07","Gate":"25","Destino":"Internacional \/ Sao Paulo","Pais":"Brasil","Status":"Embarque Imediato"},{"CIA":"AMERICAN AIRLINES","VOO":"07784","STD":"17:07","ETD":"17:07","Gate":"25","Destino":"Pinto Martins \/Int.","Pais":"Brasil","Status":"Em

In [7]:
flights = infraero.get_flights("SBGL", departure=False)

In [8]:
df_flights = pd.DataFrame.from_dict(flights, orient="columns")
df_flights = df_flights[['NOM_CIA', 'NUM_VOO', 'HOR_PREV', 'HOR_CONF', 'NUM_GATE', 'NOM_AEROPORTO', 'NOM_PAIS', 'TXT_OBS']]
df_flights.columns = ['CIA', 'VOO', 'STA', 'ETA', 'Gate', 'Origem', 'Pais', 'Status']
df_flights

Unnamed: 0,CIA,VOO,STA,ETA,Gate,Origem,Pais,Status
0,TAM,3306,16:12,16:20,25,Internacional / Sao Paulo,Brasil,Aeronave no Pátio
1,TAM,9188,16:30,16:16,22,Salgado Filho,Brasil,Aeronave no Pátio
2,GOL,1610,16:50,16:24,05,Congonhas,Brasil,Aeronave no Pátio
3,TAM,8021,17:10,17:30,30,Santiago Internacional,Chile,Confirmado
4,GOL,1391,17:19,16:52,03,Zumbi dos Palmares,Brasil,Confirmado
5,AVIANCA - ONE,6227,17:25,17:25,07,Deputado Luiz Eduardo Magalhaes,Brasil,Previsto
6,TAM,3638,17:29,17:29,25,Zumbi dos Palmares,Brasil,Confirmado
7,GOL,1555,17:32,17:31,06A,Salgado Filho,Brasil,Confirmado
8,GOL,1659,17:34,17:00,06B,Deputado Luiz Eduardo Magalhaes,Brasil,Confirmado
9,GOL,1431,17:38,17:38,03,Vitoria,Brasil,Previsto


# Protótipo de JSON

In [9]:
flights_sbgl = pd.DataFrame.from_dict(infraero.get_flights("SBGL"), orient="columns")
flights_sbgl = flights_sbgl[['NOM_CIA', 'NUM_VOO', 'DAT_VOO', 'HOR_PREV', 'HOR_CONF', 'NUM_GATE', 'NOM_AEROPORTO', 'NOM_PAIS', 'TXT_OBS']]
flights_sbgl.columns = ['CIA', 'VOO', 'Dia', 'STD', 'ETD', 'Gate', 'Destino', 'Pais', 'Status']

In [10]:
flights_sbrj = pd.DataFrame.from_dict(infraero.get_flights("SBRJ"), orient="columns")
flights_sbrj = flights_sbrj[['NOM_CIA', 'NUM_VOO', 'DAT_VOO', 'HOR_PREV', 'HOR_CONF', 'NUM_GATE', 'NOM_AEROPORTO', 'NOM_PAIS', 'TXT_OBS']]
flights_sbrj.columns = ['CIA', 'VOO', 'Dia', 'STD', 'ETD', 'Gate', 'Destino', 'Pais', 'Status']

In [11]:
results = {
    "airports": {
        "SBGL": {
            "name": "Galeão - Aeroporto Internacional Antônio Carlos Jobim",
            "traffic_mins": "25",
            "departures": flights_sbgl.iloc[:5].to_dict(orient="records"),
        },
        "SBRJ": {
            "name": "Aeroporto Santos Dumont",
            "traffic_mins": "43",
            "departures": flights_sbrj.iloc[:5].to_dict(orient="records"),
        },

    }
}

In [12]:
import json

In [13]:
print json.dumps(results, indent=4)

{
    "airports": {
        "SBGL": {
            "traffic_mins": "25", 
            "name": "Gale\u00e3o - Aeroporto Internacional Ant\u00f4nio Carlos Jobim", 
            "departures": [
                {
                    "STD": "16:04", 
                    "Status": "Aeronave Decolando", 
                    "CIA": "PASSAREDO", 
                    "VOO": "02259", 
                    "Dia": "26/11", 
                    "Destino": "Leite Lopes", 
                    "ETD": "16:06", 
                    "Pais": "Brasil", 
                    "Gate": null
                }, 
                {
                    "STD": "16:04", 
                    "Status": "Aeronave Decolando", 
                    "CIA": "TAM", 
                    "VOO": "02259", 
                    "Dia": "26/11", 
                    "Destino": "Leite Lopes", 
                    "ETD": "16:06", 
                    "Pais": "Brasil", 
                    "Gate": null
                }, 
                {
 