<a href="https://colab.research.google.com/github/GHUB-arnav-10/Geospatial-Analysis/blob/main/Geocoding.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from geopy.geocoders import Nominatim

In [4]:
geolocator = Nominatim(user_agent="me")
location = geolocator.geocode("India Gate")

print(location.point)
print(location.address)

28 36m 46.5595s N, 77 13m 46.1742s E
India Gate, Shahjahan Road, Pandara Park, Chanakya Puri Tehsil, New Delhi, Delhi, 020626, India


In [5]:
point=location.point
print("Latitude:", point.latitude)
print("Longitude:", point.longitude)

Latitude: 28.6129332
Longitude: 77.22949282049879


In [7]:
import pandas as pd
universities = pd.read_csv("/content/top_universities.csv")
universities.head()

Unnamed: 0,Name
0,University of Oxford
1,University of Cambridge
2,Imperial College London
3,ETH Zurich
4,UCL


In [10]:
import numpy as np
import geopandas as gpd
from shapely.geometry import Point
def my_geocoder(row):
    try:
        point = geolocator.geocode(row).point
        return pd.Series({'Latitude': point.latitude, 'Longitude': point.longitude})
    except:
        return None

universities[['Latitude', 'Longitude']] = universities.apply(lambda x: my_geocoder(x['Name']), axis=1)

print("{}% of addresses were geocoded!".format(
    (1 - sum(np.isnan(universities["Latitude"])) / len(universities)) * 100))
universities = universities.loc[~np.isnan(universities["Latitude"])]
universities = gpd.GeoDataFrame(
    universities, geometry=gpd.points_from_xy(universities.Longitude, universities.Latitude))
universities.crs = {'init': 'epsg:4326'}
universities.head()



100.0% of addresses were geocoded!


  in_crs_string = _prepare_from_proj_string(in_crs_string)


Unnamed: 0,Name,Latitude,Longitude,geometry
0,University of Oxford,51.758708,-1.255668,POINT (-1.25567 51.75871)
1,University of Cambridge,52.210946,0.092005,POINT (0.09200 52.21095)
2,Imperial College London,51.498959,-0.175641,POINT (-0.17564 51.49896)
3,ETH Zurich,47.562772,7.580947,POINT (7.58095 47.56277)
4,UCL,51.521785,-0.135151,POINT (-0.13515 51.52179)


In [12]:
import folium
from folium import Marker
m = folium.Map(location=[54, 15], tiles='openstreetmap', zoom_start=2)

for idx, row in universities.iterrows():
    Marker([row['Latitude'], row['Longitude']], popup=row['Name']).add_to(m)

m

In [14]:
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
europe = world.loc[world.continent == 'Europe'].reset_index(drop=True)

europe_stats = europe[["name", "pop_est", "gdp_md_est"]]
europe_boundaries = europe[["name", "geometry"]]
europe_boundaries.head()

  world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))


Unnamed: 0,name,geometry
0,Russia,"MULTIPOLYGON (((180.00000 71.51571, 180.00000 ..."
1,Norway,"MULTIPOLYGON (((15.14282 79.67431, 15.52255 80..."
2,France,"MULTIPOLYGON (((-51.65780 4.15623, -52.24934 3..."
3,Sweden,"POLYGON ((11.02737 58.85615, 11.46827 59.43239..."
4,Belarus,"POLYGON ((28.17671 56.16913, 29.22951 55.91834..."


In [15]:
europe_stats.head()

Unnamed: 0,name,pop_est,gdp_md_est
0,Russia,144373535.0,1699876
1,Norway,5347896.0,403336
2,France,67059887.0,2715518
3,Sweden,10285453.0,530883
4,Belarus,9466856.0,63080


In [16]:
europe = europe_boundaries.merge(europe_stats, on="name")
europe.head()

Unnamed: 0,name,geometry,pop_est,gdp_md_est
0,Russia,"MULTIPOLYGON (((180.00000 71.51571, 180.00000 ...",144373535.0,1699876
1,Norway,"MULTIPOLYGON (((15.14282 79.67431, 15.52255 80...",5347896.0,403336
2,France,"MULTIPOLYGON (((-51.65780 4.15623, -52.24934 3...",67059887.0,2715518
3,Sweden,"POLYGON ((11.02737 58.85615, 11.46827 59.43239...",10285453.0,530883
4,Belarus,"POLYGON ((28.17671 56.16913, 29.22951 55.91834...",9466856.0,63080


In [17]:
european_universities = gpd.sjoin(universities, europe)


print("We located {} universities.".format(len(universities)))
print("Only {} of the universities were located in Europe (in {} different countries).".format(
    len(european_universities), len(european_universities.name.unique())))

european_universities.head()

We located 90 universities.
Only 87 of the universities were located in Europe (in 15 different countries).


Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: +init=epsg:4326 +type=crs
Right CRS: EPSG:4326

  european_universities = gpd.sjoin(universities, europe)


Unnamed: 0,Name,Latitude,Longitude,geometry,index_right,name,pop_est,gdp_md_est
0,University of Oxford,51.758708,-1.255668,POINT (-1.25567 51.75871),28,United Kingdom,66834405.0,2829108
1,University of Cambridge,52.210946,0.092005,POINT (0.09200 52.21095),28,United Kingdom,66834405.0,2829108
2,Imperial College London,51.498959,-0.175641,POINT (-0.17564 51.49896),28,United Kingdom,66834405.0,2829108
4,UCL,51.521785,-0.135151,POINT (-0.13515 51.52179),28,United Kingdom,66834405.0,2829108
5,London School of Economics and Political Science,51.514261,-0.116734,POINT (-0.11673 51.51426),28,United Kingdom,66834405.0,2829108


In [19]:
asia = world.loc[world.continent == 'Asia'].reset_index(drop=True)

asia_stats = asia[["name", "pop_est", "gdp_md_est"]]
asia_boundaries = asia[["name", "geometry"]]
asia_boundaries.head()

Unnamed: 0,name,geometry
0,Kazakhstan,"POLYGON ((87.35997 49.21498, 86.59878 48.54918..."
1,Uzbekistan,"POLYGON ((55.96819 41.30864, 55.92892 44.99586..."
2,Indonesia,"MULTIPOLYGON (((141.00021 -2.60015, 141.01706 ..."
3,Timor-Leste,"POLYGON ((124.96868 -8.89279, 125.08625 -8.656..."
4,Israel,"POLYGON ((35.71992 32.70919, 35.54567 32.39399..."


In [20]:
asia_stats.head()

Unnamed: 0,name,pop_est,gdp_md_est
0,Kazakhstan,18513930.0,181665
1,Uzbekistan,33580650.0,57921
2,Indonesia,270625568.0,1119190
3,Timor-Leste,1293119.0,2017
4,Israel,9053300.0,394652


In [21]:
asia = asia_boundaries.merge(asia_stats, on="name")
asia.head()

Unnamed: 0,name,geometry,pop_est,gdp_md_est
0,Kazakhstan,"POLYGON ((87.35997 49.21498, 86.59878 48.54918...",18513930.0,181665
1,Uzbekistan,"POLYGON ((55.96819 41.30864, 55.92892 44.99586...",33580650.0,57921
2,Indonesia,"MULTIPOLYGON (((141.00021 -2.60015, 141.01706 ...",270625568.0,1119190
3,Timor-Leste,"POLYGON ((124.96868 -8.89279, 125.08625 -8.656...",1293119.0,2017
4,Israel,"POLYGON ((35.71992 32.70919, 35.54567 32.39399...",9053300.0,394652


In [22]:
asian_universities = gpd.sjoin(universities, asia)


print("We located {} universities.".format(len(universities)))
print("Only {} of the universities were located in Asia (in {} different countries).".format(
    len(asian_universities), len(asian_universities.name.unique())))

asian_universities.head()

We located 90 universities.
Only 1 of the universities were located in Asia (in 1 different countries).


Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: +init=epsg:4326 +type=crs
Right CRS: EPSG:4326

  asian_universities = gpd.sjoin(universities, asia)


Unnamed: 0,Name,Latitude,Longitude,geometry,index_right,name,pop_est,gdp_md_est
45,University of Southampton,1.429917,103.612177,POINT (103.61218 1.42992),40,Malaysia,31949777.0,364681


In [23]:
NorthAmerica = world.loc[world.continent == 'North America'].reset_index(drop=True)

na_stats = NorthAmerica[["name", "pop_est", "gdp_md_est"]]
na_boundaries = NorthAmerica[["name", "geometry"]]
na_boundaries.head()

Unnamed: 0,name,geometry
0,Canada,"MULTIPOLYGON (((-122.84000 49.00000, -122.9742..."
1,United States of America,"MULTIPOLYGON (((-122.84000 49.00000, -120.0000..."
2,Haiti,"POLYGON ((-71.71236 19.71446, -71.62487 19.169..."
3,Dominican Rep.,"POLYGON ((-71.70830 18.04500, -71.68774 18.316..."
4,Bahamas,"MULTIPOLYGON (((-78.98000 26.79000, -78.51000 ..."


In [24]:
na_stats.head()

Unnamed: 0,name,pop_est,gdp_md_est
0,Canada,37589262.0,1736425
1,United States of America,328239523.0,21433226
2,Haiti,11263077.0,14332
3,Dominican Rep.,10738958.0,88941
4,Bahamas,389482.0,13578


In [25]:
na = na_boundaries.merge(na_stats, on="name")
na.head()

Unnamed: 0,name,geometry,pop_est,gdp_md_est
0,Canada,"MULTIPOLYGON (((-122.84000 49.00000, -122.9742...",37589262.0,1736425
1,United States of America,"MULTIPOLYGON (((-122.84000 49.00000, -120.0000...",328239523.0,21433226
2,Haiti,"POLYGON ((-71.71236 19.71446, -71.62487 19.169...",11263077.0,14332
3,Dominican Rep.,"POLYGON ((-71.70830 18.04500, -71.68774 18.316...",10738958.0,88941
4,Bahamas,"MULTIPOLYGON (((-78.98000 26.79000, -78.51000 ...",389482.0,13578


In [26]:
na_universities = gpd.sjoin(universities, na)


print("We located {} universities.".format(len(universities)))
print("Only {} of the universities were located in North America (in {} different countries).".format(
    len(na_universities), len(na_universities.name.unique())))

na_universities.head()

We located 90 universities.
Only 1 of the universities were located in North America (in 1 different countries).


Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: +init=epsg:4326 +type=crs
Right CRS: EPSG:4326

  na_universities = gpd.sjoin(universities, na)


Unnamed: 0,Name,Latitude,Longitude,geometry,index_right,name,pop_est,gdp_md_est
57,University of Geneva,41.799306,-80.961667,POINT (-80.96167 41.79931),1,United States of America,328239523.0,21433226
