## Notebook to produce kepler.gl compatible CSV for visualization of Azan in select cities for a particular date. The cities_csv used is downloadable [here](https://public.opendatasoft.com/explore/dataset/geonames-all-cities-with-a-population-1000/table/?disjunctive.cou_name_en&sort=name) and the population_csv is downloadable [here](https://www.pewforum.org/2015/04/02/religious-projection-table/2020/percent/all/)

In [None]:
import collections
import datetime
import numpy as np
import os
import pandas as pd
import prayer_times
import pytz

In [None]:
# Prayer time calculation method.
METHOD = 'Karachi'

# Path to GeoNames CSV containing a list of cities with >1k population.
cities_csv = ''
# Path to PEW CSV containing a list of population stats for different counteis.
 = ''
# Date we're visualizing.
calc_date = datetime.datetime(year=2021, month=1, day=1, hour=0, minute=0, tzinfo=pytz.UTC)

output_dir = ''
unfiltered_csv_name = os.path.join(output_dir, 'populated_cities_karachi.csv')
filtered_csv_name =  os.path.join(output_dir, 'populated_cities_karachi_filtered.csv')

In [None]:
population_df =  pd.read_csv(population_csv)
cities_df = pd.read_csv(cities_csv, encoding='ISO-8859-1')

# Split Coordinates column into lat, long
cities_df[['lat','lon']] = cities_df['Coordinates'].str.split(',',expand=True)
# Return -1 where we don't have a population value.
countries_to_population = collections.defaultdict(lambda: -1, zip(population_df['Country'], population_df['Muslims']))

In [None]:
prayer_names = ['fajr', 'sunrise', 'dhuhr', 'asr', 'maghrib', 'sunset', 'isha']
num_errors = 0
num_correct = 0

PT = prayer_times.PrayTimes(METHOD)

output_rows = []
for index, input_row in cities_df.iterrows():
    lat, lon =(float(input_row['lat']), float(input_row['lon']))
    try:
        p_times = PT.getTimes((calc_date.year,
                               calc_date.month,
                               calc_date.day), (lat, lon), 0)        
        for prayer_name in prayer_names:
            if p_times[prayer_name] == '-----':
                continue
            else:
                prayer_time = datetime.datetime.strptime(p_times[prayer_name], '%H:%M')
                prayer_date_time = calculation_date.replace(hour=prayer_time.hour, minute=prayer_time.minute)
                
                output_row = input_row.to_dict()                
                muslim_pop_percent = countries_to_population[input_row["Country name EN"]]
                additional_data = {"time": prayer_date_time,
                                   "str_time": str(prayer_date_time),
                                   "event": prayer_name,
                                   "muslim_percent": muslim_pop_percent,
                                   "muslim_population": int(muslim_pop_percent * input_row['Population'] /100)}
                output_row.update(additional_data)
                output_rows.append(output_row)

        num_correct += 1                    
    except ValueError as v:
        num_errors += 1

print("Errors: ", num_errors)
print("Success: ", num_correct)

In [None]:
output_df = pd.DataFrame(output_rows)
output_df.to_csv(unfiltered_csv_name, index=False)

## The CSV produced above is for *all* cities, let's filter it based on our criteria.

In [None]:
stats_df = pd.read_csv(unfiltered_csv_name)

In [None]:
# We look at places where either 

# 1. The muslim population > 1000 people.
df_1k = stats_df[stats_df['muslim_population'] >= 1000]

# 2. The population data of the city is unkown, *but* the  muslim population in the country > 50%
df_mus_pop = stats_df[(stats_df['Population'] == 0) & (stats_df['muslim_percent'] >= 50)]

# The two criteria are mutually exclusive since muslim_population is 0 where population is 0 so we can direclty merge
# the two.
filtered_df =  pd.concat([df_1k, df_mus_pop], ignore_index=True, sort=False)

# If the muslim population in a country <= 1%, we only count cities with more than 1M residents.
filtered_df = filtered_df[(filtered_df['muslim_percent'] > 1) | (filtered_df['Population'] > 1000000)]

In [None]:
# This is the final filtered CSV we visualize in kepler.gl
filtered_df.to_csv(filtered_csv_name, index=False)