In [26]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
    
df = pd.read_json("./openbeta-usa-routes-aug-2020.zip", lines=True)
# %matplotlib inline

In [27]:
df.sample(5)
# Get the number of rows and columns 
rows = len(df.axes[0]) 
cols = len(df.axes[1]) 
  
# Print the number of rows and columns 
print("Number of Rows: " + str(rows)) 
print("Number of Columns: " + str(cols)) 

df.head()

Number of Rows: 183253
Number of Columns: 9


Unnamed: 0,route_name,grade,safety,type,fa,description,location,protection,metadata
0,Wheres Waldo?,"{'YDS': 'V2', 'Font': '5+'}",,{'boulder': True},unknown,[Sit Start on the crack. Pull a big move to a ...,,[Pads],"{'left_right_seq': '999999', 'parent_lnglat': ..."
1,Unknown,{},,"{'tr': True, 'ice': True}",Unkown,[Just a general entry for the routes. Usually ...,[Can't miss the silo with a giant sheet of ice...,[No gear needed. All supplied and is Top Rope],"{'left_right_seq': '0', 'parent_lnglat': [-92...."
2,Vanished Edens,"{'YDS': 'V4', 'Font': '6B'}",,{'boulder': True},"Joe Feldman, 2019",[Start right hand in a sidepull slot and left ...,[Hot Stuff Camp Roof],[pad - good landing],"{'left_right_seq': '1', 'parent_lnglat': [-91...."
3,Stairway to Heaven,"{'YDS': '5.7', 'French': '5a', 'Ewbanks': '15'...",,"{'trad': True, 'tr': True}",unknown,[Climb the large flake right of Slot Machine t...,,"[SR, tricams are handy.]","{'left_right_seq': '5', 'parent_lnglat': [-91...."
4,Shagadelic Humper Bumper,"{'YDS': '5.8', 'French': '5b', 'Ewbanks': '16'...",,{'tr': True},unknown,[Climb the buttress left of Cake Walk.],,[Build a TR anchor on off of trees above.],"{'left_right_seq': '999999', 'parent_lnglat': ..."


In [28]:
print(df.iloc[0].to_dict())

{'route_name': 'Wheres Waldo?', 'grade': {'YDS': 'V2', 'Font': '5+'}, 'safety': '', 'type': {'boulder': True}, 'fa': 'unknown', 'description': ['Sit Start on the crack. Pull a big move to a crimp and work across the face to the arete on the left side of the boulder. Top out from there. Super cool movement for an easy problem.'], 'location': '', 'protection': ['Pads'], 'metadata': {'left_right_seq': '999999', 'parent_lnglat': [-91.54207, 42.02717], 'parent_sector': "Waldo's Rock Park", 'mp_route_id': '118170758', 'mp_sector_id': '118170033'}}


In [29]:
# remove {}
df = df[df['grade'] != {}]

# remove all rows without YDS 

df = df[df['grade'].apply(lambda x: 'YDS' in x)]

def extract_yds(row):
    return row.get('YDS')


df['YDS'] = df['grade'].apply(extract_yds)

df.drop(columns=['grade'], inplace=True)

df.head()

Unnamed: 0,route_name,safety,type,fa,description,location,protection,metadata,YDS
0,Wheres Waldo?,,{'boulder': True},unknown,[Sit Start on the crack. Pull a big move to a ...,,[Pads],"{'left_right_seq': '999999', 'parent_lnglat': ...",V2
2,Vanished Edens,,{'boulder': True},"Joe Feldman, 2019",[Start right hand in a sidepull slot and left ...,[Hot Stuff Camp Roof],[pad - good landing],"{'left_right_seq': '1', 'parent_lnglat': [-91....",V4
3,Stairway to Heaven,,"{'trad': True, 'tr': True}",unknown,[Climb the large flake right of Slot Machine t...,,"[SR, tricams are handy.]","{'left_right_seq': '5', 'parent_lnglat': [-91....",5.7
4,Shagadelic Humper Bumper,,{'tr': True},unknown,[Climb the buttress left of Cake Walk.],,[Build a TR anchor on off of trees above.],"{'left_right_seq': '999999', 'parent_lnglat': ...",5.8
5,The Razor,,{'tr': True},unknown,"[On the North facing wall, start at the lowest...",,[Build TR anchor on trees above.],"{'left_right_seq': '999999', 'parent_lnglat': ...",5.8


In [30]:
all_keys = set().union(*(d.keys() for d in df['type']))

# Create separate columns for each key and fill them with boolean values
for key in all_keys:
    df[key] = df['type'].apply(lambda x: x.get(key, False))

# Clean the type column
df.drop(columns=['type'], inplace=True)

df.head()

Unnamed: 0,route_name,safety,fa,description,location,protection,metadata,YDS,mixed,sport,snow,aid,tr,trad,boulder,alpine,ice
0,Wheres Waldo?,,unknown,[Sit Start on the crack. Pull a big move to a ...,,[Pads],"{'left_right_seq': '999999', 'parent_lnglat': ...",V2,False,False,False,False,False,False,True,False,False
2,Vanished Edens,,"Joe Feldman, 2019",[Start right hand in a sidepull slot and left ...,[Hot Stuff Camp Roof],[pad - good landing],"{'left_right_seq': '1', 'parent_lnglat': [-91....",V4,False,False,False,False,False,False,True,False,False
3,Stairway to Heaven,,unknown,[Climb the large flake right of Slot Machine t...,,"[SR, tricams are handy.]","{'left_right_seq': '5', 'parent_lnglat': [-91....",5.7,False,False,False,False,True,True,False,False,False
4,Shagadelic Humper Bumper,,unknown,[Climb the buttress left of Cake Walk.],,[Build a TR anchor on off of trees above.],"{'left_right_seq': '999999', 'parent_lnglat': ...",5.8,False,False,False,False,True,False,False,False,False
5,The Razor,,unknown,"[On the North facing wall, start at the lowest...",,[Build TR anchor on trees above.],"{'left_right_seq': '999999', 'parent_lnglat': ...",5.8,False,False,False,False,True,False,False,False,False


In [31]:
# Clean Text
def extract_text(value):
    return value[0] if isinstance(value, list) and len(value) == 1 else None

df['description'] = df['description'].apply(extract_text)
df['location'] = df['location'].apply(extract_text)
df['protection'] = df['protection'].apply(extract_text)
df.drop(columns=['safety'], inplace=True)
df.head()

Unnamed: 0,route_name,fa,description,location,protection,metadata,YDS,mixed,sport,snow,aid,tr,trad,boulder,alpine,ice
0,Wheres Waldo?,unknown,Sit Start on the crack. Pull a big move to a c...,,Pads,"{'left_right_seq': '999999', 'parent_lnglat': ...",V2,False,False,False,False,False,False,True,False,False
2,Vanished Edens,"Joe Feldman, 2019",Start right hand in a sidepull slot and left h...,Hot Stuff Camp Roof,pad - good landing,"{'left_right_seq': '1', 'parent_lnglat': [-91....",V4,False,False,False,False,False,False,True,False,False
3,Stairway to Heaven,unknown,Climb the large flake right of Slot Machine to...,,"SR, tricams are handy.","{'left_right_seq': '5', 'parent_lnglat': [-91....",5.7,False,False,False,False,True,True,False,False,False
4,Shagadelic Humper Bumper,unknown,Climb the buttress left of Cake Walk.,,Build a TR anchor on off of trees above.,"{'left_right_seq': '999999', 'parent_lnglat': ...",5.8,False,False,False,False,True,False,False,False,False
5,The Razor,unknown,"On the North facing wall, start at the lowest ...",,Build TR anchor on trees above.,"{'left_right_seq': '999999', 'parent_lnglat': ...",5.8,False,False,False,False,True,False,False,False,False


In [32]:
df = df.rename(columns={'fa': 'first_ascent'})
df.head()

Unnamed: 0,route_name,first_ascent,description,location,protection,metadata,YDS,mixed,sport,snow,aid,tr,trad,boulder,alpine,ice
0,Wheres Waldo?,unknown,Sit Start on the crack. Pull a big move to a c...,,Pads,"{'left_right_seq': '999999', 'parent_lnglat': ...",V2,False,False,False,False,False,False,True,False,False
2,Vanished Edens,"Joe Feldman, 2019",Start right hand in a sidepull slot and left h...,Hot Stuff Camp Roof,pad - good landing,"{'left_right_seq': '1', 'parent_lnglat': [-91....",V4,False,False,False,False,False,False,True,False,False
3,Stairway to Heaven,unknown,Climb the large flake right of Slot Machine to...,,"SR, tricams are handy.","{'left_right_seq': '5', 'parent_lnglat': [-91....",5.7,False,False,False,False,True,True,False,False,False
4,Shagadelic Humper Bumper,unknown,Climb the buttress left of Cake Walk.,,Build a TR anchor on off of trees above.,"{'left_right_seq': '999999', 'parent_lnglat': ...",5.8,False,False,False,False,True,False,False,False,False
5,The Razor,unknown,"On the North facing wall, start at the lowest ...",,Build TR anchor on trees above.,"{'left_right_seq': '999999', 'parent_lnglat': ...",5.8,False,False,False,False,True,False,False,False,False


In [33]:
all_keys = set().union(*(d.keys() for d in df['metadata']))

# Create separate columns for each key and fill them with boolean values
for key in all_keys:
    df[key] = df['metadata'].apply(lambda x: x.get(key, False))

# Clean the type column
df.drop(columns=['metadata'], inplace=True)

df.head()

Unnamed: 0,route_name,first_ascent,description,location,protection,YDS,mixed,sport,snow,aid,tr,trad,boulder,alpine,ice,parent_sector,left_right_seq,mp_route_id,parent_lnglat,mp_sector_id
0,Wheres Waldo?,unknown,Sit Start on the crack. Pull a big move to a c...,,Pads,V2,False,False,False,False,False,False,True,False,False,Waldo's Rock Park,999999,118170758,"[-91.54207, 42.02717]",118170033
2,Vanished Edens,"Joe Feldman, 2019",Start right hand in a sidepull slot and left h...,Hot Stuff Camp Roof,pad - good landing,V4,False,False,False,False,False,False,True,False,False,Hot Stuff Camp Roof,1,117944541,"[-91.5615, 42.6154]",117944427
3,Stairway to Heaven,unknown,Climb the large flake right of Slot Machine to...,,"SR, tricams are handy.",5.7,False,False,False,False,True,True,False,False,False,Drive In Wall,5,106956280,"[-91.5625, 42.614]",106947227
4,Shagadelic Humper Bumper,unknown,Climb the buttress left of Cake Walk.,,Build a TR anchor on off of trees above.,5.8,False,False,False,False,True,False,False,False,False,Cake Walk and Razor Wall,999999,106956324,"[-91.5625, 42.614]",106947239
5,The Razor,unknown,"On the North facing wall, start at the lowest ...",,Build TR anchor on trees above.,5.8,False,False,False,False,True,False,False,False,False,Cake Walk and Razor Wall,999999,106956330,"[-91.5625, 42.614]",106947239


In [34]:
df.drop(columns=['mixed'], inplace=True)
df.drop(columns=['sport'], inplace=True)
df.drop(columns=['snow'], inplace=True)
df.drop(columns=['aid'], inplace=True)
df.drop(columns=['tr'], inplace=True)
df.drop(columns=['trad'], inplace=True)
df.drop(columns=['boulder'], inplace=True)
df.drop(columns=['alpine'], inplace=True)
df.drop(columns=['ice'], inplace=True)

In [35]:
# column_headings = df.columns.tolist()
# print(column_headings)

# Function to extract longitude and latitude
def extract_longitude(row):
    return row[0] if isinstance(row, list) and len(row) == 2 else None

def extract_latitude(row):
    return row[1] if isinstance(row, list) and len(row) == 2 else None

# Create new columns 'longitude' and 'latitude' using the extracted values
df['longitude'] = df['parent_lnglat'].apply(extract_longitude)
df['latitude'] = df['parent_lnglat'].apply(extract_latitude)

# Drop the original 'parent_lnglat' column
df.drop(columns=['parent_lnglat'], inplace=True)

df.head()

Unnamed: 0,route_name,first_ascent,description,location,protection,YDS,parent_sector,left_right_seq,mp_route_id,mp_sector_id,longitude,latitude
0,Wheres Waldo?,unknown,Sit Start on the crack. Pull a big move to a c...,,Pads,V2,Waldo's Rock Park,999999,118170758,118170033,-91.54207,42.02717
2,Vanished Edens,"Joe Feldman, 2019",Start right hand in a sidepull slot and left h...,Hot Stuff Camp Roof,pad - good landing,V4,Hot Stuff Camp Roof,1,117944541,117944427,-91.5615,42.6154
3,Stairway to Heaven,unknown,Climb the large flake right of Slot Machine to...,,"SR, tricams are handy.",5.7,Drive In Wall,5,106956280,106947227,-91.5625,42.614
4,Shagadelic Humper Bumper,unknown,Climb the buttress left of Cake Walk.,,Build a TR anchor on off of trees above.,5.8,Cake Walk and Razor Wall,999999,106956324,106947239,-91.5625,42.614
5,The Razor,unknown,"On the North facing wall, start at the lowest ...",,Build TR anchor on trees above.,5.8,Cake Walk and Razor Wall,999999,106956330,106947239,-91.5625,42.614


In [36]:
df = df.head(10000)
len(df)


10000

In [38]:
# Get the last column name
last_column = df.columns[-1]

# Move the last column to the first position
df = df[[last_column] + [col for col in df.columns if col != last_column]]

df.head()

Unnamed: 0,id,route_name,first_ascent,description,location,protection,YDS,parent_sector,left_right_seq,mp_route_id,mp_sector_id,longitude,latitude
0,14a43978-b7fa-46f8-9ff1-64bee82f8ff1,Wheres Waldo?,unknown,Sit Start on the crack. Pull a big move to a c...,,Pads,V2,Waldo's Rock Park,999999,118170758,118170033,-91.54207,42.02717
2,cb62acfc-d864-438f-b6d6-23106b349bf0,Vanished Edens,"Joe Feldman, 2019",Start right hand in a sidepull slot and left h...,Hot Stuff Camp Roof,pad - good landing,V4,Hot Stuff Camp Roof,1,117944541,117944427,-91.5615,42.6154
3,bb8e2f19-f991-43e4-a087-88cb7f1ca554,Stairway to Heaven,unknown,Climb the large flake right of Slot Machine to...,,"SR, tricams are handy.",5.7,Drive In Wall,5,106956280,106947227,-91.5625,42.614
4,7d4cfd88-198d-4a58-8169-cf88beefddfb,Shagadelic Humper Bumper,unknown,Climb the buttress left of Cake Walk.,,Build a TR anchor on off of trees above.,5.8,Cake Walk and Razor Wall,999999,106956324,106947239,-91.5625,42.614
5,484311cf-d9de-41eb-b4c2-8bf54afeefb0,The Razor,unknown,"On the North facing wall, start at the lowest ...",,Build TR anchor on trees above.,5.8,Cake Walk and Razor Wall,999999,106956330,106947239,-91.5625,42.614


In [21]:
value_counts = df['mixed'].dtype
print(value_counts)

has_empty_values = df['mixed'].isnull().any()
print('hasempty', has_empty_values)

bool
hasempty False


In [37]:
import uuid
df['id'] = [str(uuid.uuid4()) for _ in range(len(df))]
df.head()

Unnamed: 0,route_name,first_ascent,description,location,protection,YDS,parent_sector,left_right_seq,mp_route_id,mp_sector_id,longitude,latitude,id
0,Wheres Waldo?,unknown,Sit Start on the crack. Pull a big move to a c...,,Pads,V2,Waldo's Rock Park,999999,118170758,118170033,-91.54207,42.02717,14a43978-b7fa-46f8-9ff1-64bee82f8ff1
2,Vanished Edens,"Joe Feldman, 2019",Start right hand in a sidepull slot and left h...,Hot Stuff Camp Roof,pad - good landing,V4,Hot Stuff Camp Roof,1,117944541,117944427,-91.5615,42.6154,cb62acfc-d864-438f-b6d6-23106b349bf0
3,Stairway to Heaven,unknown,Climb the large flake right of Slot Machine to...,,"SR, tricams are handy.",5.7,Drive In Wall,5,106956280,106947227,-91.5625,42.614,bb8e2f19-f991-43e4-a087-88cb7f1ca554
4,Shagadelic Humper Bumper,unknown,Climb the buttress left of Cake Walk.,,Build a TR anchor on off of trees above.,5.8,Cake Walk and Razor Wall,999999,106956324,106947239,-91.5625,42.614,7d4cfd88-198d-4a58-8169-cf88beefddfb
5,The Razor,unknown,"On the North facing wall, start at the lowest ...",,Build TR anchor on trees above.,5.8,Cake Walk and Razor Wall,999999,106956330,106947239,-91.5625,42.614,484311cf-d9de-41eb-b4c2-8bf54afeefb0


In [39]:
df.to_csv('climbing_routes_10k.csv', index=False)

In [35]:
# Run this only once

from sqlalchemy import create_engine 
import os

uri = os.environ['DB_URL']
# print(uri)
if uri.startswith("postgres://"):
    uri = uri.replace("postgres://", "postgresql://", 1)
# print('After: ' + uri)
engine = create_engine(uri)
df.to_sql('climbs', engine)


ValueError: Table 'climbs' already exists.

In [37]:
from sqlalchemy.orm import sessionmaker
Session = sessionmaker(bind=engine)
session = Session()
from resources import ClimbModel
# Read data from CSV and insert into the existing table
with open('./climbing_routes.csv', 'r') as file:
    next(file)  # Skip the header if exists
    for line in file:
        data = line.strip().split(',')  # Assuming CSV has comma-separated values
        climb = ClimbModel(
            route_name=data[0],
            first_ascent=data[1],
            description=data[2],
            location=data[3],
            protection=data[4],
            YDS=data[5],
            mixed=data[6].lower() == 'true',
            ice=data[7].lower() == 'true',
            trad=data[8].lower() == 'true',
            aid=data[9].lower() == 'true',
            alpine=data[10].lower() == 'true',
            sport=data[11].lower() == 'true',
            tr=data[12].lower() == 'true',
            boulder=data[13].lower() == 'true',
            snow=data[14].lower() == 'true',
            mp_sector_id=data[15],
            mp_route_id=data[16],
            parent_sector=data[17],
            left_right_seq=data[18],
            longitude=float(data[19]),
            latitude=float(data[20])
        )
        session.add(climb)

    session.commit()
# Close the session
session.close()

ImportError: cannot import name 'ClimbModel' from 'resources' (unknown location)

In [18]:
from sqlalchemy import text


sql = text("SELECT column_name FROM information_schema.columns WHERE table_name = 'climbing_data'")

with engine.connect() as conn:
    result = conn.execute(sql)
    column_names = [row[0] for row in result.fetchall()]
    print("Column Names:")
    for column_name in column_names:
        print(column_name)

Column Names:
index
route_name
first_ascent
description
location
protection
YDS
alpine
trad
snow
mixed
ice
boulder
aid
sport
tr
mp_sector_id
mp_route_id
parent_sector
left_right_seq
longitude
latitude


In [19]:
# Delete Table

from sqlalchemy import text
sql = text('DROP TABLE IF EXISTS climbing_data;')
with engine.connect() as conn:
    result = conn.execute(sql)
    print(result)
    conn.execute(text('COMMIT;'))

<sqlalchemy.engine.cursor.CursorResult object at 0x000002B1A57D0220>


In [34]:
query = f"SELECT * FROM climbs LIMIT 5"
df = pd.read_sql(query, engine)

# Display the first 5 rows of the retrieved data
print(df)

Empty DataFrame
Columns: [index, route_name, first_ascent, description, location, protection, YDS, alpine, trad, snow, mixed, ice, boulder, aid, sport, tr, mp_sector_id, mp_route_id, parent_sector, left_right_seq, longitude, latitude]
Index: []

[0 rows x 22 columns]
