# Write a Web Service

•  Wrap the output of the second exercise in a web service that returns the data in JSON format (instead of printing to the standard output).

• The web service should accept a parameter n>0. For the top 10 airports, n is 10. For the X top airports, n is X

## Step 1: Let's start with a sample

In [25]:
%%writefile top_arrival_airports_2013.py
import streamlit as st
import pandas as pd

st.title('Top arrival airports in 2013')
st.subheader('This web service will allow you to get the top arrival airports in terms of passengers in 2013 on a JSON format')
st.markdown('Please insert the number of TOP airports you want to get. For instance, for the TOP 10 airports you will have to specify 10.')

n = int(st.text_input("Insert a number:"))

bookings_sample = pd.read_csv('bookings.sample.csv.bz2', compression='bz2', sep='^', usecols=['year','arr_port','pax'])

bookings_sample_2013 = bookings_sample[bookings_sample['year'] == 2013]
top_airports = bookings_sample_2013.groupby('arr_port')['pax'].sum().sort_values(ascending=False).head(10)
st.table(top_airports)

result_json = top_airports.to_json()
st.json(result_json)

Overwriting top_arrival_airports_2013.py


## Step 2: Let's do it now with the whole dataset using chunks

In [24]:
%%writefile top_arrival_airports_2013.py
import streamlit as st
import pandas as pd

st.title('Top arrival airports in 2013')
st.subheader('This web service will allow you to get the top arrival airports in terms of passengers in 2013 on a JSON format')
st.markdown('Please insert the number of TOP airports you want to get. For instance, for the TOP 10 airports you will have to specify 10.')

n = int(st.text_input("Insert a number:"))

chksize = 100000
reader = pd.read_csv('/home/dsc/Data/challenge/bookings_without_duplicates.csv' , sep='^', usecols=['year','arr_port','pax'], iterator=True, chunksize=chksize)
all_chunks= pd.DataFrame()

for df in reader:
    df = df[df['year'] == 2013]
    result_chunk = df.groupby('arr_port')['pax'].sum()
    all_chunks = all_chunks.append(result_chunk)

top_airports = all_chunks.groupby('arr_port')['pax'].sum().sort_values(ascending=False).head(n)
st.table(top_airports)

result_json = top_airports.to_json()
st.json(result_json)

Overwriting top_arrival_airports_2013.py


## Step 3: Let's do it again with a sample uploaded on the Internet

In [21]:
%%writefile top_arrival_airports_2013.py
import streamlit as st
import pandas as pd

st.title('Top arrival airports in 2013')
st.subheader('This web service will allow you to get the top arrival airports in terms of passengers in 2013 on a JSON format')
st.markdown('Please insert the number of TOP airports you want to get. For instance, for the TOP 10 airports you will have to specify 10.')

n = int(st.text_input("Insert a number:"))

bookings_sample = pd.read_csv('https://github.com/Laurajmoreno/DS_Challenge/blob/main/bookings.sample.csv.bz2', compression='bz2', sep='^', usecols=['year','arr_port','pax'])

bookings_sample_2013 = bookings_sample[bookings_sample['year'] == 2013]
top_airports = bookings_sample_2013.groupby('arr_port')['pax'].sum().sort_values(ascending=False).head(10)
st.table(top_airports.head())

result_json = top_airports.to_json()
st.json(result_json)

Overwriting top_arrival_airports_2013.py


In [None]:
%%writefile top_arrival_airports_2013.py
import streamlit as st
import pandas as pd

st.title('Top arrival airports in 2013')
st.subheader('This web service will allow you to get the top arrival airports in terms of passengers in 2013 on a JSON format')
st.markdown('Please insert the number of TOP airports you want to get. For instance, for the TOP 10 airports you will have to specify 10.')

n = st.text_input("Insert a number:")

@st.cache
def get_csv():
    return pd.read_csv('https://github.com/Laurajmoreno/DS_Challenge/blob/main/bookings.sample.csv.bz2', compression='bz2', sep='^', usecols=['year','arr_port','pax'])
bookings_sample = get_csv()

bookings_sample_2013 = bookings_sample[bookings_sample['year'] == 2013]
st.table(bookings_sample_2013.head())

In [8]:
%%writefile top_arrival_airports_2013.py
import streamlit as st
import pandas as pd

st.title('Top arrival airports in 2013')
st.subheader('This web service will allow you to get the top arrival airports in terms of passengers in 2013 on a JSON format')
st.markdown('Please insert the number of TOP airports you want to get. For instance, for the TOP 10 airports you will have to specify 10.')

n = st.text_input("Insert a number:")

chksize=100

@st.cache
def csv_():
    return pd.read_csv('https://github.com/Laurajmoreno/DS_Challenge/blob/main/bookings.sample.csv.bz2', compression='bz2', sep='^', usecols=['year','arr_port','pax'], iterator=True, chunksize=chksize)

reader = csv_()
all_chunks= pd.DataFrame()

for df in reader:
    df = df[df['year'] == 2013]
    result_chunk = df.groupby('arr_port')['pax'].sum()
    all_chunks = all_chunks.append(result_chunk)

result = pax_per_airport_2013.reset_index().groupby('arr_port')['pax'].sum().sort_values(ascending=False).head(n)
result_json = result.to_json()

st.json(result_json)


Overwriting top_arrival_airports_2013.py


In [7]:
!ls

 bookings.sample.csv.bz2
'Exercise 1 - Counting the number of lines in a big file.ipynb'
'Exercise 2 - Top 10 arrival airports in 2013 .ipynb'
'Exercise 3 - Number of searches for Madrid, Barcelona and Malaga.ipynb'
'Exercise 4 - Searches with bookings match.ipynb'
'Exercise 5 - Write a Web Service.ipynb'
 README.md
 searches.sample.csv.bz2
 top_arrival_airports_2013.py
