# This notebook convert location column values to a geoson file because polygon data can't be used in Leaflet in R:

In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
from shapely import wkt

In [2]:
### to see all columns
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [3]:
### Read the cleaned csv file
ecos_us_ca = pd.read_csv("../data/ecos_end_species_us_ca_clean.csv")
ecos_us_ca.head()

Unnamed: 0,common_name,scientific_name,species_group,esa_status,foreign_or_domestic,image_url_full,location,country,food,habitat_requirements,home_range,cause,reproduction,description
0,Akekee,Loxops caeruleirostris,Birds,Endangered,Domestic,https://ecos.fws.gov/docs/species_images/doc46...,"POLYGON((-160.30501096653 21.7282346819371,-16...",United States,"The 'Akeke'e feeds mostly on spiders, psyllid...",The 'Akeke'e inhabits Lowland Mesic and Wet Fo...,The 'Akeke'e is endemic to Kaua'i Island of th...,The 'Akeke'e population appears to have remain...,The 'Akeke'e nests in open-cup nests located ...,The 'Akeke'e is a small Hawaiian honeycreeper....
1,Polar bear,Ursus maritimus,Mammals,Threatened,Both Domestic and Foreign,https://ecos.fws.gov/docs/species_images/doc50...,"POLYGON((-174.837571354155 59.7457496094217,-1...",Canada,No information available,No information available,No information available,No information available,No information available,Polar bears are protected under the Marine Mam...
2,Pitcher's thistle,Cirsium pitcheri,Flowering Plants,Threatened,Both Domestic and Foreign,https://ecos.fws.gov/docs/species_images/doc50...,"POLYGON((-88.2001819788612 41.1621947025267,-8...",Canada,No information available,No information available,No information available,No information available,No information available,No information available
3,Piping Plover,Charadrius melodus,Birds,Endangered,Both Domestic and Foreign,https://ecos.fws.gov/docs/species_images/doc37...,"POLYGON((-93.0885545621889 37.8619826666356,-9...",Canada,No information available,No information available,No information available,No information available,No information available,Size: 18 cm (7.25 in) in length. Color: Breedi...
4,Pink mucket (pearlymussel),Lampsilis abrupta,Clams,Endangered,Domestic,https://ecos.fws.gov/docs/species_images/doc49...,"POLYGON((-93.8616440305422 32.505883021288,-93...",United States,Suspension feeder on plankton; deposit feeder ...,"Relatively silt-free substrates of sand, grave...",Sedentary.,No information available,"Separate sexes, males releasing sperm to water...",No information available


## 1- final cleaning before geospatial conversion:

### 1- Column "common_name" and "No common name" multiple values:

### For Df us_ca

In [4]:
### There is 7 species with no common name and I need to modify it to be able to differenciate them for the app
### look at the rows numbers:
#ecos_us_ca.loc[:, "common_name"]

there are located in rows 57, 59, 60, 61, 62, 63, 66

In [5]:
ecos_us_ca.iloc[65, :]

common_name                               Northeastern beach tiger beetle
scientific_name                               Cicindela dorsalis dorsalis
species_group                                                     Insects
esa_status                                                     Threatened
foreign_or_domestic                                              Domestic
image_url_full          https://ecos.fws.gov/docs/species_images/doc48...
location                POLYGON((-76.5301458997165 37.080589428058,-76...
country                                                     United States
food                    Adults are active predators that forage on sma...
habitat_requirements    Adult and larval tiger beetles are found on lo...
home_range              Once abundant along coastal beaches from Massa...
cause                   Few northeastern beach tiger beetle sites are ...
reproduction            Adults are primarily active from June to Septe...
description             The largest (1

In [6]:
### Aristida chaseae
mask = (ecos_us_ca['common_name'] == "No common name") & (ecos_us_ca['scientific_name'] == "Aristida chaseae")
ecos_us_ca['common_name'][mask] = "Chase's threeawn"

In [7]:
### Chamaecrista glandulosa var. mirabilis
mask = (ecos_us_ca['common_name'] == "No common name") & (ecos_us_ca['scientific_name'] == "Chamaecrista glandulosa var. mirabilis")
ecos_us_ca['common_name'][mask] = "Cassia mirabilis"

In [8]:
### Lepanthes eltoroensis
mask = (ecos_us_ca['common_name'] == "No common name") & (ecos_us_ca['scientific_name'] == "Lepanthes eltoroensis")
ecos_us_ca['common_name'][mask] = "Luquillo Mountain babyboot orchid"

In [9]:
### Lyonia truncata var. proctorii (there is really no common name so I change it for the scientific name otherwise I can't separate it with other no common name species)
mask = (ecos_us_ca['common_name'] == "No common name") & (ecos_us_ca['scientific_name'] == "Lyonia truncata var. proctorii")
ecos_us_ca['common_name'][mask] = "No common name 1"

In [10]:
### Schoepfia arenaria (there is also no common name for this species so I change it for the scientific name
mask = (ecos_us_ca['common_name'] == "No common name") & (ecos_us_ca['scientific_name'] == "Schoepfia arenaria")
ecos_us_ca['common_name'][mask] = "No common name 2"

In [11]:
### Vernonia proctorii
mask = (ecos_us_ca['common_name'] == "No common name") & (ecos_us_ca['scientific_name'] == "Vernonia proctorii")
ecos_us_ca['common_name'][mask] = "Proctor's ironweed"

In [12]:
### Cranichis ricartii
mask = (ecos_us_ca['common_name'] == "No common name") & (ecos_us_ca['scientific_name'] == "Cranichis ricartii")
ecos_us_ca['common_name'][mask] = "Puerto Rico helmet orchid"

In [13]:
### Check if there is "No common name" values left:
ecos_us_ca[["common_name"]].eq("No common name").sum()

common_name    0
dtype: int64

### For Df all countries

In [14]:
### Read the cleaned csv file
ecos_all_countries = pd.read_csv("../data/ecos_end_species_us_ca_clean.csv")
ecos_all_countries.head()

Unnamed: 0,common_name,scientific_name,species_group,esa_status,foreign_or_domestic,image_url_full,location,country,food,habitat_requirements,home_range,cause,reproduction,description
0,Akekee,Loxops caeruleirostris,Birds,Endangered,Domestic,https://ecos.fws.gov/docs/species_images/doc46...,"POLYGON((-160.30501096653 21.7282346819371,-16...",United States,"The 'Akeke'e feeds mostly on spiders, psyllid...",The 'Akeke'e inhabits Lowland Mesic and Wet Fo...,The 'Akeke'e is endemic to Kaua'i Island of th...,The 'Akeke'e population appears to have remain...,The 'Akeke'e nests in open-cup nests located ...,The 'Akeke'e is a small Hawaiian honeycreeper....
1,Polar bear,Ursus maritimus,Mammals,Threatened,Both Domestic and Foreign,https://ecos.fws.gov/docs/species_images/doc50...,"POLYGON((-174.837571354155 59.7457496094217,-1...",Canada,No information available,No information available,No information available,No information available,No information available,Polar bears are protected under the Marine Mam...
2,Pitcher's thistle,Cirsium pitcheri,Flowering Plants,Threatened,Both Domestic and Foreign,https://ecos.fws.gov/docs/species_images/doc50...,"POLYGON((-88.2001819788612 41.1621947025267,-8...",Canada,No information available,No information available,No information available,No information available,No information available,No information available
3,Piping Plover,Charadrius melodus,Birds,Endangered,Both Domestic and Foreign,https://ecos.fws.gov/docs/species_images/doc37...,"POLYGON((-93.0885545621889 37.8619826666356,-9...",Canada,No information available,No information available,No information available,No information available,No information available,Size: 18 cm (7.25 in) in length. Color: Breedi...
4,Pink mucket (pearlymussel),Lampsilis abrupta,Clams,Endangered,Domestic,https://ecos.fws.gov/docs/species_images/doc49...,"POLYGON((-93.8616440305422 32.505883021288,-93...",United States,Suspension feeder on plankton; deposit feeder ...,"Relatively silt-free substrates of sand, grave...",Sedentary.,No information available,"Separate sexes, males releasing sperm to water...",No information available


In [15]:
### There is 7 species with no common name and I need to modify it to be able to differenciate them for the app
### look at the rows numbers:
#ecos_all_countries.loc[:, "common_name"]

there are located in rows 128, 135, 136, 137, 139, 140, 141 

In [16]:
### Aristida chaseae
mask = (ecos_all_countries['common_name'] == "No common name") & (ecos_all_countries['scientific_name'] == "Aristida chaseae")
ecos_all_countries['common_name'][mask] = "Chase's threeawn"

In [17]:
### Chamaecrista glandulosa var. mirabilis
mask = (ecos_all_countries['common_name'] == "No common name") & (ecos_all_countries['scientific_name'] == "Chamaecrista glandulosa var. mirabilis")
ecos_all_countries['common_name'][mask] = "Cassia mirabilis"

In [18]:
### Lepanthes eltoroensis
mask = (ecos_all_countries['common_name'] == "No common name") & (ecos_all_countries['scientific_name'] == "Lepanthes eltoroensis")
ecos_all_countries['common_name'][mask] = "Luquillo Mountain babyboot orchid"

In [19]:
### Lyonia truncata var. proctorii (there is really no common name so I change it for the scientific name otherwise I can't separate it with other no common name species)
mask = (ecos_all_countries['common_name'] == "No common name") & (ecos_all_countries['scientific_name'] == "Lyonia truncata var. proctorii")
ecos_all_countries['common_name'][mask] = "No common name 1"

In [20]:
### Schoepfia arenaria (there is also no common name for this species so I change it for the scientific name
mask = (ecos_all_countries['common_name'] == "No common name") & (ecos_all_countries['scientific_name'] == "Schoepfia arenaria")
ecos_all_countries['common_name'][mask] = "No common name 2"

In [21]:
### Vernonia proctorii
mask = (ecos_all_countries['common_name'] == "No common name") & (ecos_all_countries['scientific_name'] == "Vernonia proctorii")
ecos_all_countries['common_name'][mask] = "Proctor's ironweed"

In [22]:
### Cranichis ricartii
mask = (ecos_all_countries['common_name'] == "No common name") & (ecos_all_countries['scientific_name'] == "Cranichis ricartii")
ecos_all_countries['common_name'][mask] = "Puerto Rico helmet orchid"

In [23]:
### Check if there is "No common name" values left:
ecos_all_countries[["common_name"]].eq("No common name").sum()

common_name    0
dtype: int64

### 2- Prepare dataframe for geo conversion:

### Df Us and canada

In [24]:
### 436 rows
ecos_us_ca.shape

(434, 14)

In [25]:
### There is 5 missing polygones so unfortunately I have to drop them... or I can't convert into geojson (other columns do not contain missing values).
ecos_us_ca = ecos_us_ca[ecos_us_ca["location"].notnull()]

In [26]:
### 429 rows left
ecos_us_ca.shape

(429, 14)

### Df all contries

In [27]:
### 436 rows
ecos_all_countries.shape

(434, 14)

In [28]:
### There is 5 missing polygones so unfortunately I have to drop them... or I can't convert into geojson (other columns do not contain missing values).
ecos_all_countries = ecos_all_countries[ecos_all_countries["location"].notnull()]

In [29]:
### 429 rows left
ecos_all_countries.shape

(429, 14)

## 2- Df conversion to geojson files:

### Df US-canada

In [30]:
### The format of the polygon is WKT, so I have to convert it to shapely Polygon ("https://geopandas.readthedocs.io/en/latest/gallery/create_geopandas_from_pandas.html")
### Make a new column "geometry" for the polygon data
ecos_us_ca['geometry'] = ecos_us_ca["location"].apply(wkt.loads)

In [31]:
ecos_us_ca_geo = gpd.GeoDataFrame(ecos_us_ca)

In [32]:
ecos_us_ca_geo.set_geometry(col='geometry', inplace=True)

In [33]:
### Save the geodata
ecos_us_ca_geo.to_file("../data/ecos_us_ca_geo.geojson")

### Df all countries

In [34]:
### Make a new column "geometry" for the polygon data
ecos_all_countries['geometry'] = ecos_all_countries["location"].apply(wkt.loads)

In [35]:
ecos_all_geo = gpd.GeoDataFrame(ecos_all_countries)

In [36]:
ecos_all_geo.set_geometry(col='geometry', inplace=True)

In [37]:
### Save the geodata
ecos_all_geo.to_file("../data/ecos_all_geo.geojson")