# Dataprep // Geographic Coordinates
src: https://docs.dataprep.ai/user_guide/clean/clean_lat_long.html?highlight=dms

### Imports + raw data

In [1]:
import pandas as pd
#pip install dataprep
from dataprep.clean import clean_lat_long

data = "data/worldcities.csv"
raw = pd.read_csv(data)
raw.head()

Unnamed: 0,city,city_ascii,lat,lng,country,iso2,iso3,admin_name,capital,population,id
0,Tokyo,Tokyo,35.6839,139.7744,Japan,JP,JPN,Tōkyō,primary,39105000.0,1392685764
1,Jakarta,Jakarta,-6.2146,106.8451,Indonesia,ID,IDN,Jakarta,primary,35362000.0,1360771077
2,Delhi,Delhi,28.6667,77.2167,India,IN,IND,Delhi,admin,31870000.0,1356872604
3,Manila,Manila,14.6,120.9833,Philippines,PH,PHL,Manila,primary,23971000.0,1608618140
4,São Paulo,Sao Paulo,-23.5504,-46.6339,Brazil,BR,BRA,São Paulo,admin,22495000.0,1076532519


In [2]:
df = raw[["city_ascii","country","iso2","lat","lng"]] #display only the columns of intrest
df = df.rename(columns={'city_ascii': 'city', 'iso2' :'short', 'lng': 'lon'}) #rename columns
df

Unnamed: 0,city,country,short,lat,lon
0,Tokyo,Japan,JP,35.6839,139.7744
1,Jakarta,Indonesia,ID,-6.2146,106.8451
2,Delhi,India,IN,28.6667,77.2167
3,Manila,Philippines,PH,14.6000,120.9833
4,Sao Paulo,Brazil,BR,-23.5504,-46.6339
...,...,...,...,...,...
42900,Tukchi,Russia,RU,57.3670,139.5000
42901,Numto,Russia,RU,63.6667,71.3333
42902,Nord,Greenland,GL,81.7166,-17.8000
42903,Timmiarmiut,Greenland,GL,62.5333,-42.2167


## Lat/Lon >>> dms in seperate columns

In [3]:
# DMS : df0
df0 = clean_lat_long(df, lat_col="lat", long_col="lon", split=True, output_format="dms")
df0.head()

  0%|                                                         | 0/13 [00:00<?, ?it/s]

Latitude and Longitude Cleaning Report:
	42905 values cleaned (100.0%)
Result contains 42905 (100.0%) values in the correct format and 0 null values (0.0%)


Unnamed: 0,city,country,short,lat,lon,lat_clean,lon_clean
0,Tokyo,Japan,JP,35.6839,139.7744,35° 41′ 2.04″ N,139° 46′ 27.84″ E
1,Jakarta,Indonesia,ID,-6.2146,106.8451,6° 12′ 52.56″ S,106° 50′ 42.36″ E
2,Delhi,India,IN,28.6667,77.2167,28° 40′ 0.12″ N,77° 13′ 0.12″ E
3,Manila,Philippines,PH,14.6,120.9833,14° 35′ 60″ N,120° 58′ 59.88″ E
4,Sao Paulo,Brazil,BR,-23.5504,-46.6339,23° 33′ 1.44″ S,46° 38′ 2.04″ W


### Rename "lat/lon~_clean" and remove origin lat/lon 

In [4]:
df0 = df0[["city","country","short","lat_clean","lon_clean"]] #display only the columns of intrest
df0 = df0.rename(columns={"lat_clean":"latitude", "lon_clean":"longitude"})
df0

Unnamed: 0,city,country,short,latitude,longitude
0,Tokyo,Japan,JP,35° 41′ 2.04″ N,139° 46′ 27.84″ E
1,Jakarta,Indonesia,ID,6° 12′ 52.56″ S,106° 50′ 42.36″ E
2,Delhi,India,IN,28° 40′ 0.12″ N,77° 13′ 0.12″ E
3,Manila,Philippines,PH,14° 35′ 60″ N,120° 58′ 59.88″ E
4,Sao Paulo,Brazil,BR,23° 33′ 1.44″ S,46° 38′ 2.04″ W
...,...,...,...,...,...
42900,Tukchi,Russia,RU,57° 22′ 1.2″ N,139° 30′ 0″ E
42901,Numto,Russia,RU,63° 40′ 0.12″ N,71° 19′ 59.88″ E
42902,Nord,Greenland,GL,81° 42′ 59.76″ N,17° 48′ 0″ W
42903,Timmiarmiut,Greenland,GL,62° 31′ 59.88″ N,42° 13′ 0.12″ W


### lat/lon to one column  

In [5]:
df0 = clean_lat_long(df, lat_col="lat", long_col="lon", output_format="dms")
df0.head()

  0%|                                                         | 0/13 [00:00<?, ?it/s]

Latitude and Longitude Cleaning Report:
	42905 values cleaned (100.0%)
Result contains 42905 (100.0%) values in the correct format and 0 null values (0.0%)


Unnamed: 0,city,country,short,lat,lon,latitude_longitude
0,Tokyo,Japan,JP,35.6839,139.7744,"35° 41′ 2.04″ N, 139° 46′ 27.84″ E"
1,Jakarta,Indonesia,ID,-6.2146,106.8451,"6° 12′ 52.56″ S, 106° 50′ 42.36″ E"
2,Delhi,India,IN,28.6667,77.2167,"28° 40′ 0.12″ N, 77° 13′ 0.12″ E"
3,Manila,Philippines,PH,14.6,120.9833,"14° 35′ 60″ N, 120° 58′ 59.88″ E"
4,Sao Paulo,Brazil,BR,-23.5504,-46.6339,"23° 33′ 1.44″ S, 46° 38′ 2.04″ W"


## Lat/Lon >>> dm in seperate columns

In [6]:
# DM : df1
df1 = clean_lat_long(df, lat_col="lat", long_col="lon", split=True, output_format="dm")
df1

  0%|                                                         | 0/13 [00:00<?, ?it/s]

Latitude and Longitude Cleaning Report:
	42905 values cleaned (100.0%)
Result contains 42905 (100.0%) values in the correct format and 0 null values (0.0%)


Unnamed: 0,city,country,short,lat,lon,lat_clean,lon_clean
0,Tokyo,Japan,JP,35.6839,139.7744,35° 41.034′ N,139° 46.464′ E
1,Jakarta,Indonesia,ID,-6.2146,106.8451,6° 12.876′ S,106° 50.706′ E
2,Delhi,India,IN,28.6667,77.2167,28° 40.002′ N,77° 13.002′ E
3,Manila,Philippines,PH,14.6000,120.9833,14° 36′ N,120° 58.998′ E
4,Sao Paulo,Brazil,BR,-23.5504,-46.6339,23° 33.024′ S,46° 38.034′ W
...,...,...,...,...,...,...,...
42900,Tukchi,Russia,RU,57.3670,139.5000,57° 22.02′ N,139° 30′ E
42901,Numto,Russia,RU,63.6667,71.3333,63° 40.002′ N,71° 19.998′ E
42902,Nord,Greenland,GL,81.7166,-17.8000,81° 42.996′ N,17° 48′ W
42903,Timmiarmiut,Greenland,GL,62.5333,-42.2167,62° 31.998′ N,42° 13.002′ W
