# data source 
https://pci.gov.ng/conversion-centers

In [3]:
from bs4 import BeautifulSoup
import pandas as pd
import requests
from geopy.geocoders import Nominatim

### Data Scrapping 

In [4]:
url = 'https://pci.gov.ng/conversion-centers'

response = requests.get(url)
soup = BeautifulSoup(response.content,"html.parser")

# check for the tag that holds all the information we want to extract
results = soup.find(class_="table table-striped")

# select all the table rows
table = results.find_all('tr')
link = results.find_all('a')

# now let extract etable data in the rows and store in in a list
data = []

for row in table[1:]: # skip the data , it contains empty data
    cols = row.find_all('td')
    rows = [info.text.strip() for info in cols]
    data.append(rows)

google_url = []
for url in link:
    google_url.append(url.get('href'))

# convert the list of data dataframe
# create the list of columns header
cols_header = ['State','LGA','Name','Address']
df = pd.DataFrame(columns=cols_header,data=data)

# drop all none fields
df.dropna(inplace=True)

# drop duplicates field
df.drop_duplicates(inplace=True)

# add the list url limk to the dataframe
df['google_map_url'] = google_url

# run a test to cofirm the total data scrapped is equal to 160, because according to the information on the website we havve 160 CNG stations
assert len(df) == 160, "The total Number of Data Scrapped is not equal to 160"


In [5]:
df

Unnamed: 0,State,LGA,Name,Address,google_map_url
0,FCT,Abuja Municipal Area Council,Nigeria Institute of Transport Technology (NIT...,"Plot 487, Asuquo Okon Street, Daki-Biyu, Jabi ...",https://maps.google.com/?q=Plot+487%2C+Asuquo+...
1,Lagos,Ibeju-Lekki,Femadec Energy Limited Lagos,"KM 42, Lekki-Epe Expressway, Majek Second Gate...",https://maps.google.com/?q=KM+42%2C+Lekki-Epe+...
2,Kaduna,Kaduna North,Rolling Energy Limited Kaduna State,"Plot 16715, Kaduna Abuja Expressway, Kakau Tow...",https://maps.google.com/?q=Plot+16715%2C+Kadun...
3,FCT,Abuja Municipal Area Council,ABG Oil and Gas Limited Abuja,"49FJ+G6R Kubwa 901101, Federal Capital Territo...",https://maps.google.com/?q=49FJ%2BG6R+Kubwa+90...
4,Kaduna,Zaria,Nigerian Institute of Transport Technology (NI...,"Basawa Road, Palladan, Zaria, Kaduna Conversio...",https://maps.google.com/?q=Basawa+Road%2C+Pall...
...,...,...,...,...,...
155,Delta,Uvwie,Dandani Gas Limited,"135 Refinery Road, KM3, Opposite Navy Air Base...",https://maps.google.com/?q=135+Refinery+Road%2...
156,Delta,Aniocha South,Negus Energy and Engineering Services Limited,"18, Old Site Mechanic Village, Asaba, Delta State",https://maps.google.com/?q=18%2C+Old+Site+Mech...
157,Kaduna,Kaduna North,Attayseer Autogas Nig Ltd,"NO 1, Gulf Club Cross Road, Al-Mansur Motors M...",https://maps.google.com/?q=NO+1%2C+Gulf+Club+C...
158,FCT,Municipal Area Council,Hi-Grade Energies Limited,Arish Mall Zuba Opposite Enyo Filling Station ...,https://maps.google.com/?q=Arish+Mall+Zuba+Opp...


### Geocoding


Convert the Address to longitude and latitude

In [None]:
# lon = []
# lat = []
# for _,col in df.iterrows():
#     geolocator = Nominatim(user_agent="samuel")
#     print(col['Name'])
#     location = geolocator.geocode(col['Name'])
#     print(location.longitude)
#     lon.append(location.longitude)
#     lat.append(location.latitude)

# lon,lat
# # df['longitude','latitude'] = 

Nigeria Institute of Transport Technology (NITT) Conversion Centre


AttributeError: 'NoneType' object has no attribute 'longitude'

In [10]:
url = requests.get('https://maps.google.com/?q=Plot+487%2C+Asuquo+Okon+Street%2C+Daki-Biyu%2C+Jabi+District%2C+Abuja')

In [11]:
url.content

b'<!DOCTYPE html><html itemscope="" itemtype="http://schema.org/Place" lang="en-NG"> <head>  <link href="/maps/_/js/k=maps.m.en.pYqIZFpMDpM.es5.O/m=sc2,per,mo,lp,ep,ti,ds,stx,dwi,enr,pwd,dw,plm,log,b/am=4ACQ5IAU/rt=j/d=1/rs=ACT90oFlQTzO3CJTZnmVlO5EX867TuveFQ?wli=m.DUKneoDoDjA.loadSv.O%3A%3B" as="script" rel="preload" type="application/javascript" nonce="7C1sUIGcp4Zlxf_YJc2-FQ">  <link href="/maps/preview/opensearch.xml?hl=en" title="Google Maps" rel="search" type="application/opensearchdescription+xml"> <title> Google Maps </title> <meta content="Find local businesses, view maps and get driving directions in Google Maps." name="Description">  <meta content="Google Maps" itemprop="name"> <meta content="Google Maps" property="og:title">  <meta content="https://maps.google.com/maps/api/staticmap?center=9.05892779%2C7.42690868&amp;zoom=17&amp;size=900x900&amp;language=en&amp;sensor=false&amp;client=google-maps-frontend&amp;signature=msL1Ws4nZFnng2oMx0hi7Vlxf1M" itemprop="image"> <meta cont

In [13]:
import re

# Assuming `url.content` is a bytes object
url_content = url.content.decode('utf-8').strip()  # Decode bytes to string and strip any extra spaces

# Regular expression to extract center coordinates
match = re.search(r'center=([\d\.\-]+)%2C([\d\.\-]+)', url_content)

if match:
    latitude = match.group(1)
    longitude = match.group(2)
    print(f"Latitude: {latitude}, Longitude: {longitude}")
else:
    print("No coordinates found.")

Latitude: 9.05892779, Longitude: 7.42690868
