In [1]:
# import libraries
import os
from dotenv import load_dotenv
import datetime
import requests
from bs4 import BeautifulSoup
import pandas as pd
import plotly.express as px
from geopy.distance import geodesic
import googlemaps
import time
from pathlib import Path



In [2]:
Path.cwd()

PosixPath('/mnt/g/Side Projects/the-narrow-gate/narrow-gate/narrow_gate/notebooks')

In [3]:
# get secrets
load_dotenv()

GEMINI_API_KEY = os.getenv('GEMINI_API_KEY')
GOOGLE_MAPS_API_KEY = os.getenv('GOOGLE_MAPS_API_KEY')

In [4]:
#set up
gmaps = googlemaps.Client(key=GOOGLE_MAPS_API_KEY)
# genai.configure(api_key=GEMINI_API_KEY)
URL = 'https://www.mass-schedules.com'
header = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.99 Safari/537.36"}

In [5]:
#getting the data from the website

def get_page(url):
  page = requests.get(url, headers=header)
  s1 = BeautifulSoup(page.content,'html.parser')
  s2 = BeautifulSoup(s1.prettify(),'html.parser')
  return s2

def get_map(s2):
  loc_map = {}
  tables = s2.find_all('table')
  for a in tables[0].find_all('a', href=True):
    city = a.contents[0].strip()
    s2 = get_page(URL+a['href']) # get churches for specific city
    tables = s2.find_all('table') # get all churches
    loc_map[city] = [(x.find('span', {'class':'list_church_name'}).contents[0].strip(), x['href']) for x in tables[0].find_all('a', href=True)]
  return loc_map

def cell_parse(cell, tag, filter):
  if cell.find(tag, filter):
    return cell.find(tag, filter).contents[0].strip()
  else:
    return ''

def get_church_info(x):
  church_info = {}
  for li in get_page(x).find('ul', id='church_info1').find_all('li'):
    try:
      church_info[li.find('label').contents[0].strip()[:-1]] = li.find('p').contents[0].strip()
    except:
      pass

  return church_info

def get_mass_schedule(x):
  mass_schedule = []
  for row in get_page(x).find_all("tr"):
    for idx, cell in enumerate(row.find_all("td")):
      temp = {}
      temp['day_of_week'] = idx
      temp['schedule'] = cell_parse(cell, 'p', {'class':'schedule'})
      temp['language'] = cell_parse(cell, 'p', {'class':'language'})
      temp['comment'] = cell_parse(cell, 'p', {'class':'comment'})
      if temp['schedule']!='':
        mass_schedule.append(temp)

  return mass_schedule

def compile_churches(city):
  churches = []
  for x in city:
    print(x)
    temp = {}
    temp['church_name'] = x[0]
    temp.update(get_church_info(URL+x[1]))
    churches.append(temp)


  return pd.DataFrame(churches)


def compile_schedules(city):
  schedules = []
  for x in city:
    temp = pd.DataFrame(get_mass_schedule(URL+x[1]))
    temp['church_name'] = x[0]
    temp['church_address'] = get_church_info(URL+x[1])['address']
    schedules.append(temp)

  return pd.concat(schedules)

def build_church_dataset(city_map):
  churches = pd.concat([compile_churches(city_map[c]) for c in list(city_map.keys())])
  churches['coords'] = churches.address.apply(lambda x: gmaps.geocode(x)[0]['geometry']['location'])
  churches['long'] = churches.coords.apply(lambda x: x['lng'])
  churches['lat'] = churches.coords.apply(lambda x: x['lat'])
  print(churches.shape)
  #churches.to_csv('churches.csv')
  return churches

def calculate_distance(x, geocode_in):
    return geodesic(x, (geocode_in[0]['geometry']['location']['lat'], geocode_in[0]['geometry']['location']['lng']))

def time_diff(start_time, end_time):
    """Calculates the time difference between two time strings in HH:MM am/pm format.

    Args:
        start_time: The start time string.
        end_time: The end time string.

    Returns:
        A datetime.timedelta object representing the time difference.
    """

    # Parse the time strings into datetime objects
    start_datetime = datetime.datetime.strptime(start_time, '%I:%M %p')
    end_datetime = datetime.datetime.strptime(end_time, '%I:%M %p')

    # Calculate the time difference
    time_difference = end_datetime - start_datetime

    return time_difference

def find_mass(sample_address, search_range, target_sched, current_datetime):
  sample_geocode = gmaps.geocode(sample_address)
  churches['relative_dist'] = churches.apply(lambda x: calculate_distance((x['lat'],x['long']), sample_geocode).km, axis=1)
  church_results = churches[churches.relative_dist<=5]
  church_results.sort_values(by='relative_dist', inplace=True)

  masses['mass_start'] = masses['schedule'].apply(lambda x: x.split(' - ')[0])
  masses['mass_end'] = masses['schedule'].apply(lambda x: x.split(' - ')[1])
  mass_results = masses[(masses.mass_start==target_sched) & (masses.church_address.isin(church_results.church_address)) & (masses.day_of_week==current_datetime.weekday())]

  church_results = church_results[church_results.church_name.isin(mass_results.church_name)]
  church_results['Travel Time(mins)'] = church_results.address.apply(lambda x: gmaps.distance_matrix(sample_address, x, mode="driving", departure_time=datetime.datetime.now()))
  church_results['Travel Time(mins)'] = church_results['Travel Time(mins)'].apply(lambda x: x['rows'][0]['elements'][0]['duration']['value'] / 60)

  travel_times = church_results[['church_name','Travel Time(mins)']].set_index('church_name').to_dict()['Travel Time(mins)']
  #addresses = church_results[['church_name','address']].set_index('church_name').to_dict()['address']

  mass_results['Travel Time(Mins)'] = mass_results.church_name.replace(travel_times)
  #mass_results['Address'] = mass_results.church_name.replace(addresses)
  mass_results = mass_results.sort_values(by='Travel Time(Mins)').drop_duplicates(subset=['church_name']).head()
  mass_results['Arrival Time'] = mass_results['Travel Time(Mins)'].apply(lambda x: (current_datetime + datetime.timedelta(minutes=x)).strftime("%I:%M %p"))

  return church_results, mass_results[['church_name', 'Address', 'schedule', 'language', 'comment', 'Travel Time(Mins)', 'Arrival Time']]

def refresh_dataset():
    s2 = get_page(URL+"/philippine-locations.html")
    tables = s2.find_all('table') # get all cities
    city_map = get_map(s2)
    
    # note: figure out how to handle confessions
    churches = build_church_dataset(city_map)
    print(churches.shape)
    masses = pd.concat([compile_schedules(city_map[c]) for c in list(city_map.keys())])
    masses['day_of_week'] = abs(masses['day_of_week'] - 6)
    print(masses.shape)
    churches.to_csv('../data/churches.csv')
    masses.to_csv('../data/masses.csv')

In [6]:
s2 = get_page(URL+"/philippine-locations.html")
tables = s2.find_all('table') # get all cities
city_map = get_map(s2)

In [None]:
refresh_dataset()

('Archdiocesan Shrine of Christ our Lord of the Holy Sepulcher (Apung Mamacalulu)', '/catholic-church/1498/archdiocesan-shrine-of-christ-our-lord-of-the-holy-sepulcher-apung-mamacalulu.html')
('Christ the King Parish', '/catholic-church/1499/christ-the-king-parish.html')
('Holy Cross Parish', '/catholic-church/2065/holy-cross-parish.html')
('Holy Rosary Parish (Pisamban Maragul)', '/catholic-church/40/holy-rosary-parish-pisamban-maragul.html')
('Holy Spirit Parish', '/catholic-church/1500/holy-spirit-parish.html')
('Immaculate Conception Parish', '/catholic-church/1501/immaculate-conception-parish.html')
('Jesus The Eternal Word Parish', '/catholic-church/1502/jesus-the-eternal-word-parish.html')
('Our Lady Of Fatima Parish', '/catholic-church/1503/our-lady-of-fatima-parish.html')
('Our Lady of Lourdes Parish', '/catholic-church/1504/our-lady-of-lourdes-parish.html')
('Our Lady of Perpetual Help Chapel', '/catholic-church/1355/our-lady-of-perpetual-help-chapel.html')
('Our Lady of Reme

In [None]:
churches = pd.read_csv('../data/churches.csv')
masses = pd.read_csv('../data/masses.csv')

In [None]:
sample_address = 'Skyway Twin Towers Condominium Capt. Javier st. Brgy Oranbo Pasig city'
search_range = 10
target_sched = '6:00 PM'
current_datetime = datetime.datetime.now()

In [None]:
church_results, mass_results = find_mass(sample_address, search_range, target_sched, current_datetime)

fig = px.scatter_map(church_results[church_results.church_name.isin(mass_results.church_name)],
                     lat="lat",
                     lon="long",
                     hover_name="church_name", # display church name on hover
                     text="church_name"
                     )
sample_geocode = gmaps.geocode(sample_address)
fig.add_scattermap(lon=[sample_geocode[0]['geometry']['location']['lng']],
                   lat=[sample_geocode[0]['geometry']['location']['lat']],
                   name='my location'
                   )

fig.show()
mass_results