In [None]:
Getting the country

In [None]:
#Big query 
!pip install --upgrade google-cloud-bigquery
from google.colab import auth
auth.authenticate_user()
from google.cloud import bigquery

In [None]:
# Set project ID
project_id = "rock-finder-project"
client = bigquery.Client(project=project_id)

In [None]:
#GETTING THE DF FROM BIG QUERY
query = """
SELECT * FROM `rock-finder-project.routes.mp_routes`
"""
df = client.query(query).to_dataframe()

In [None]:
# Download zipfile on your computer and upload it to colab folder
# https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/50m/cultural/ne_50m_admin_0_countries.zip

#!unzip ne_50m_admin_0_countries.zip -d ne_countries  #UNCOMMENT FOR UNZIP ONLY

from shapely.geometry import Point

geometry = [Point(xy) for xy in zip(df['Area Longitude'], df['Area Latitude'])]
gdf_points = gpd.GeoDataFrame(df, geometry=geometry, crs="EPSG:4326")

world = gpd.read_file("ne_countries/ne_50m_admin_0_countries.shp")

joined = gpd.sjoin(gdf_points, world[['ADMIN', 'geometry']], how='left', predicate='within')

In [None]:
#Check for missing values
joined[joined["ADMIN"].isna()]

In [None]:
# Implementing reverse geocoding method
import requests
def reverse_geocode(lat, lon):

  params = {
    'lat': lat,
    'lon': lon,
    'format': 'json',
  }

  headers = {
    'User-Agent': 'GeoChecker/1.0 (josedaro186@gmail.com)'
  }

  response = requests.get("https://nominatim.openstreetmap.org/reverse", params=params, headers=headers)
  data = response.json()
  try:
    country = data["address"]["country"]
    return country
  except:
    return data

In [None]:
# Filling the missing values with reverse Geocoding
import numpy as np
from tqdm.notebook import tqdm
tqdm.pandas()

df['country'] = joined[['ADMIN', 'Area Longitude','Area Latitude']].progress_apply(lambda row: reverse_geocode(row['Area Latitude'], row['Area Longitude']) if pd.isna(row['ADMIN']) else row['ADMIN'], axis=1)
df.to_csv("df_with_country.csv")

In [None]:
Treat the new Dataset with countries

In [None]:
import pandas as pd

In [None]:
#Modify pandas to show all the values of the dataframe
pd.set_option('display.max_rows', None)

In [None]:
#Reset pandas row display
pd.reset_option('display.max_rows')

In [None]:
#Read the new csv and check the values
df_w_country = pd.read_csv("/content/df_with_country.csv")
df_w_country["country"].value_counts()

In [None]:
#Rename the values where country was not found to unknown
df_w_country.loc[df_w_country["country"] == "{'title': '500 Internal Server Error'}", "country"] = "Unknown"

In [None]:
#Homogenize United States as United States of America
df_w_country.loc[df_w_country["country"] == "United States", "country"] = "United States of America"

In [None]:
#Create the key value
df_w_country["URL_split"] = df_w_country["URL"].str.split("/")

In [None]:
#Create the key value
df_w_country["key"] = df_w_country["URL_split"].str[4]

In [None]:
#Create the final dataset of the keyvalue and the country
df_country_final = df_w_country[["key","country"]]

In [None]:
#Check how it looks like 
df_country_final

In [None]:
#Prepare to upload new dataframe to Big Query
project_id = "rock-finder-project"
dataset_id = "rock-finder-project.routes"
table_id = "country_final"
full_table_id = f"{dataset_id}.{table_id}"

In [None]:
#Upload new Dataframe to Big query 
from pandas_gbq import to_gbq
to_gbq(df_country_final, destination_table=full_table_id, project_id=project_id, if_exists="replace")