In [1]:
#Load all the libraries
import wikipedia #Wikipedia API
import pandas as pd # library for data analysis
import requests # library to handle requests
from bs4 import BeautifulSoup # library to parse HTML documents

In [2]:
#Function that uses the API to get the url, then scrapes the information and returns a dataframe
def search_mountains(x):
    wikiurl=wikipedia.page(x).url
    table_class="wikitable sortable jquery-tablesorter"
    response=requests.get(wikiurl)
    soup = BeautifulSoup(response.text, 'html.parser')
    mountains=soup.find('table',{'class':"wikitable"})
    df=pd.read_html(str(mountains))
    # convert list to dataframe
    df=pd.DataFrame(df[0])
    if isinstance(df.columns, pd.core.indexes.multi.MultiIndex):
        temp=[]
        for i in df.columns:
            temp.append(" ".join(i))
        df.columns=temp
    return(df)

In [3]:
#Find the column of the dataframe that contains the name of the mountain
def col_name(cols):
    col_list=[]
    names=["peak","mountain","name","summit"]
    i=0
    while col_list==[] and i<4:
        col_list = [col for col in cols if names[i] in col.lower()]
        i+=1
    return col_list

In [4]:
#Find the column of the dataframe that contains the elevation of the mountain
def col_height(cols):
    col_list=[]
    names=["height","elevation","meters","metres","feet"]
    i=0
    while col_list==[] and i<5:
        col_list = [col for col in cols if names[i] in col.lower()]
        i+=1
    if len(col_list)>1 and "m" in (" ".join(col_list)).lower():
        col_list = [col for col in col_list if "m" in col.lower()]
    return col_list

In [5]:
#By using the above functions, it returns the Top 10 Mountains for the selected country.
def clean(country):
    try:
        if country=="USA":
            df=search_mountains("List of mountain peaks of the United States")
        else:
            df=search_mountains("List of mountains of "+str(country))
        name=col_name(df.columns)[0]
        height=col_height(df.columns)[0]
        col1=df[name]
        feet=0
        try:
            if re.sub(r'[^a-zA-Z]', '', df[height].iloc[0])[0]=="f":
                feet=1
        except:
            if "f" in height.lower():
                feet=1
                    
        if feet==1:
            col2=round(df[height].astype('str').str.replace(',', '').str.extractall('(\d+)').unstack().iloc[:,0].astype('int')*0.3048)
        else:
            col2=df[height].astype('str').str.replace(',', '').str.extractall('(\d+)').unstack().iloc[:,0].astype('int')
        df = pd.concat([col1,col2],axis=1)
        df.columns=['Name','Elevation']
        df["Country"]=country
        df=df.sort_values(by=['Elevation'],ascending=False).head(10)
        if df.iloc[0,0]=="Mount Everest" and country!="Nepal" and country!="China":
            return None
        else:
            return df
    except:
        return None

In [None]:
#Mongo DB
from pymongo import MongoClient
myclient = MongoClient("mongodb://localhost:27017/") #Create Link
db = myclient["apan5400"] #Create database
mycol = db["Final"] #Create table
countries=["Albania","Algeria","Armenia","Austria","Bangladesh","Belize","Bosnia","Bulgaria","Cabo Verde","Canada","Central African Republic","Channel Islands","China","Croatia","Cuba","Denmark","Dominica","Dominican Republic","Ecuador","El Salvador","Estonia","Eswatini","Faroe Islands","Finland","France","Georgia","Germany","Greece","Greenland","Grenada","Guadeloupe","Guatemala","Guinea","Haiti","Honduras","Hungary","Iceland","India","Iran","Ireland","Italy","Japan","Kenya","Lesotho","Liechtenstein","Malaysia","Martinique","Mexico","Nepal","Nicaragua","Norway","Pakistan","Peru","Philippines","Poland","Romania","Saint Lucia","Samoa","Saudi Arabia","Senegal","Serbia","Slovenia","South Africa","Sweden","Trinidad","Ukraine","USA"]
for i in countries:
    df=clean(i)
    mycol.insert_many(df.to_dict('records')) #Insert the countries in the database

In [None]:
#Execute Flask in order to run our application
from flask import Flask, request, render_template
app = Flask(__name__)
@app.route('/')
def my_form():
    return render_template("search.html")
@app.route('/', methods=['POST'])
def my_form_post():
    val1 = request.form['userinput1']
    val2 = request.form['userinput2']
    val3 = request.form['userinput3']
    val4 = request.form['userinput4']
    val5 = request.form['userinput5']
    temp=[{"Country": val1}]
    for i in [val2,val3,val4,val5]:
        temp.append({"Country": i})
    conditions={"$or":temp}
    query = mycol.find(conditions).sort("Elevation",-1)[0:10]
    list_cur = list(query)
    output = pd.DataFrame(list_cur).iloc[:,1:]
    return render_template('output.html',tables=[output.to_html(classes='data')], titles=output.columns.values)
    
app.run()
if __name__ == "__main__":  
    app.run(debug=True)