# Scatter plot map for Walcott 
- will use Rathbun scatter plot for reference 
- but will more detailed hovertext that includes specimen family name and year (but dates on file seem to be wrong)
- if there were fewer points or more time, could use mapbox
    - will us go.Scatter in plotly instead 

In [1]:
import pandas as pd 
import plotly.graph_objects as go
from collections import Counter
import numpy as np

### clean dataframe from original xlsx file
- need to create "sub" dataframe with only the columns I need for my scatter plot map 
- can use example from jones scatter plot map 

In [2]:
walcott_df = pd.read_csv('AWHI_Walcott_dataset.csv')
walcott_df.head()

Unnamed: 0,barcode,catalog_no,order,family,family_name,collector_name,year_collected,country,province_state,ezid
0,1355660,208828.0,Myrtales,Onagraceae,Evening primroses,"Walcott, M. V.",1900.0,Mexico,Colima,http://n2t.net/ark:/65665/38d98265a-6fb0-4c82-...
1,1160457,99194.0,Solanales,Hydrophyllaceae,Waterleaf family,"Walcott, C. D.",1900.0,United States,Colorado,http://n2t.net/ark:/65665/30a751787-3a18-4f16-...
2,3089925,80416.0,Apiales,Apiaceae,Umbellifers,"Walcott, C. D.",1900.0,United States,New Mexico,http://n2t.net/ark:/65665/3d8b58966-9a8b-4b3b-...
3,3088994,80403.0,Apiales,Apiaceae,Umbellifers,"Walcott, C. D.",1900.0,United States,New Mexico,http://n2t.net/ark:/65665/3c0f352f5-2101-4057-...
4,1805301,27505.0,Asterales,Asteraceae,Daisy family,"Walcott, C. D.",1900.0,United States,New Mexico,http://n2t.net/ark:/65665/3db47f2f9-1144-47d3-...


### Pulling information from the imported dataframe 
- this information will be useful for determining how to restructure the dataframe,<br>recreate sub dataframe, and figure out what type of plot and map will be most useful 
- instead of creating new csv files or eliminating information from the main csv file, it's better to make those changes using pandas 

In [3]:
walcott_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 219 entries, 0 to 218
Data columns (total 10 columns):
barcode           219 non-null int64
catalog_no        209 non-null float64
order             219 non-null object
family            219 non-null object
family_name       219 non-null object
collector_name    219 non-null object
year_collected    193 non-null float64
country           219 non-null object
province_state    219 non-null object
ezid              219 non-null object
dtypes: float64(2), int64(1), object(7)
memory usage: 17.2+ KB


In [19]:
province_state = walcott_df.province_state.unique()
province_state

array(['Colima', 'Colorado', 'New Mexico', 'California', 'Alberta',
       'Arizona', 'Nevada', 'New Brunswick', 'Tennessee',
       'British Columbia', 'Ontario', 'Saskatchewan', 'Utah', 'Guánica',
       'Maryland', 'District of Columbia', 'South Carolina', 'Oklahoma',
       'Arkansas'], dtype=object)

In [10]:
#number of specimens collected by each collector
walcott_df['collector_name'].value_counts()

Walcott, M. V.    172
Walcott, C. D.     47
Name: collector_name, dtype: int64

In [12]:
# number of specimens collected each location
walcott_df['province_state'].value_counts()

British Columbia        94
Alberta                 50
California              15
New Mexico              13
Arizona                 11
Tennessee                7
Utah                     7
Colorado                 4
South Carolina           4
Nevada                   3
Maryland                 3
Ontario                  1
New Brunswick            1
Colima                   1
District of Columbia     1
Saskatchewan             1
Arkansas                 1
Guánica                  1
Oklahoma                 1
Name: province_state, dtype: int64

In [14]:
#number of specimens in each location for each collector
walcott_df.groupby(['province_state','collector_name']).size()

province_state        collector_name
Alberta               Walcott, C. D.     1
                      Walcott, M. V.    49
Arizona               Walcott, C. D.     6
                      Walcott, M. V.     5
Arkansas              Walcott, C. D.     1
British Columbia      Walcott, M. V.    94
California            Walcott, C. D.     8
                      Walcott, M. V.     7
Colima                Walcott, M. V.     1
Colorado              Walcott, C. D.     2
                      Walcott, M. V.     2
District of Columbia  Walcott, C. D.     1
Guánica               Walcott, M. V.     1
Maryland              Walcott, C. D.     3
Nevada                Walcott, C. D.     3
New Brunswick         Walcott, C. D.     1
New Mexico            Walcott, C. D.    12
                      Walcott, M. V.     1
Oklahoma              Walcott, C. D.     1
Ontario               Walcott, M. V.     1
Saskatchewan          Walcott, M. V.     1
South Carolina        Walcott, M. V.     4
Tennessee        

### sub dataframe location_walcott_df data use
- this dataframe will be used to create the trace for each collector to make their location
- each location will have a marker size relative to the ind_count number 

In [18]:
#creating dataframe for previous groupby function
location_walcott_df = walcott_df.groupby(['province_state','collector_name']).size().to_frame().reset_index()
#renaming last column in groupby function above 
location_walcott_df = location_walcott_df.rename(columns={0: 'ind_count'})
location_walcott_df 

Unnamed: 0,province_state,collector_name,ind_count
0,Alberta,"Walcott, C. D.",1
1,Alberta,"Walcott, M. V.",49
2,Arizona,"Walcott, C. D.",6
3,Arizona,"Walcott, M. V.",5
4,Arkansas,"Walcott, C. D.",1
5,British Columbia,"Walcott, M. V.",94
6,California,"Walcott, C. D.",8
7,California,"Walcott, M. V.",7
8,Colima,"Walcott, M. V.",1
9,Colorado,"Walcott, C. D.",2


In [20]:
#list the location and province and count of specimen for each collector 
walcott_df.groupby(['collector_name','country','province_state']).size()

collector_name  country        province_state      
Walcott, C. D.  Canada         Alberta                  1
                               New Brunswick            1
                United States  Arizona                  6
                               Arkansas                 1
                               California               8
                               Colorado                 2
                               District of Columbia     1
                               Maryland                 3
                               Nevada                   3
                               New Mexico              12
                               Oklahoma                 1
                               Tennessee                7
                               Utah                     1
Walcott, M. V.  Canada         Alberta                 49
                               British Columbia        94
                               Ontario                  1
                    

In [21]:
#list of countries and the province/state loaction with the frequency 
walcott_df.groupby(['country','province_state']).size()

country        province_state      
Canada         Alberta                 50
               British Columbia        94
               New Brunswick            1
               Ontario                  1
               Saskatchewan             1
Mexico         Colima                   1
Puerto Rico    Guánica                  1
United States  Arizona                 11
               Arkansas                 1
               California              15
               Colorado                 4
               District of Columbia     1
               Maryland                 3
               Nevada                   3
               New Mexico              13
               Oklahoma                 1
               South Carolina           4
               Tennessee                7
               Utah                     7
dtype: int64

In [5]:
sub_walcott_df = walcott_df.groupby(['collector_name','family_name','country','province_state']).size().reset_index()
sub_walcott_df = sub_walcott_df.rename(columns={0:'frequency'})
sub_walcott_df

Unnamed: 0,collector_name,family_name,country,province_state,frequency
0,"Walcott, C. D.",Acanthus Family,United States,Tennessee,1
1,"Walcott, C. D.",Bellflower family,United States,New Mexico,1
2,"Walcott, C. D.",Cactus,Canada,Alberta,1
3,"Walcott, C. D.",Cactus,United States,Arizona,1
4,"Walcott, C. D.",Daisy family,United States,Arizona,1
5,"Walcott, C. D.",Daisy family,United States,New Mexico,4
6,"Walcott, C. D.",Evening primroses,United States,California,1
7,"Walcott, C. D.",Evening primroses,United States,New Mexico,1
8,"Walcott, C. D.",Heather family,United States,California,1
9,"Walcott, C. D.",Heather family,United States,Tennessee,2
