In [10]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import json



In [11]:
source = requests.get('https://skimap.org/SkiAreas/index.xml').content
soup = BeautifulSoup(source,'xml')
areas = soup.find_all('skiArea')



In [12]:
def get_data(area_result):
    name = area_result.find('name').get_text()
    if area_result.find('georeferencing'):
        lat = area_result.find('georeferencing').attrs.get('lat', "CHECK")
        lng = area_result.find('georeferencing').attrs.get('lng', "CHECK")
    else:
        lat = "NoGEO"
        lng = "NoGEO"
    return name, lat, lng


for_df = []

for a in areas:
    name, lat, lng = get_data(a)
    for_df.append({
        "name": name,
        "lat": lat, 
        "lng": lng
    })
df = pd.DataFrame.from_records(for_df)
df.head()

Unnamed: 0,lat,lng,name
0,52.977947,-66.92094,Smokey Mountain Ski Club
1,53.52358,-64.020094,Northern Lights Ski Club
2,48.161777,-54.046468,White Hills
3,53.409742,-60.425457,Snow Goose Mountain (Mont Shana)
4,48.936666666667,-57.827222222222,Marble Mountain


In [13]:
df = df[['name', 'lat', 'lng']]

In [14]:
df.head()

Unnamed: 0,name,lat,lng
0,Smokey Mountain Ski Club,52.977947,-66.92094
1,Northern Lights Ski Club,53.52358,-64.020094
2,White Hills,48.161777,-54.046468
3,Snow Goose Mountain (Mont Shana),53.409742,-60.425457
4,Marble Mountain,48.936666666667,-57.827222222222


In [15]:
df = df[df.lat != 'NoGEO']
df = df[df.lat != 'CHECK']

In [16]:
df = df[df.lng != 'NoGEO']
df = df[df.lng != 'CHECK']

In [17]:
len(df)

3720

In [37]:
df['lat'] = df['lat'].astype(float)
df['lng'] = df['lng'].astype(float)

In [42]:
df.to_csv('coordinates.csv', index = False)

In [19]:
def df_to_geojson(df, properties, lat='lat', lon='lng'):
    
    
    geojson = {'type':'FeatureCollection', 'features':[]}

    
    for _, row in df.iterrows():
        feature = {'type':'Feature',
                   'properties':{},
                   'geometry':{'type':'Point',
                               'coordinates':[]}}

        feature['geometry']['coordinates'] = [row[lon],row[lat]]

        for prop in properties:
            feature['properties'][prop] = row[prop]
        
        geojson['features'].append(feature)
    
    return geojson

In [20]:
useful_columns = ['name']
geojson_dict = df_to_geojson(df, properties= useful_columns)
geojson_str = json.dumps(geojson_dict, indent=2)

In [21]:
output_filename = 'dataset.js'
with open(output_filename, 'w') as output_file:
    output_file.write('var dataset = {};'.format(geojson_str))
    
print('{} geotagged features saved to file'.format(len(geojson_dict['features'])))

3720 geotagged features saved to file


In [1]:
from sqlalchemy import create_engine
from sqlalchemy.orm import Session

import sqlite3 
from sqlalchemy.ext.declarative import declarative_base

Base = declarative_base()

In [26]:
engine = create_engine("sqlite:///ski_resorts.sqlite")
conn = engine.connect()

In [36]:
df.to_sql('ski_area', conn, if_exists ='append')