In [2]:
#!pip install geopandas
#!apt install libspatialindex-dev
#!pip install rtree
#!pip install pygeos
import geopandas as gpd
import json
import pandas as pd
import rtree
import pygeos

Collecting geopandas
  Downloading geopandas-0.10.2-py2.py3-none-any.whl (1.0 MB)
[K     |████████████████████████████████| 1.0 MB 7.8 MB/s 
[?25hCollecting fiona>=1.8
  Downloading Fiona-1.8.21-cp37-cp37m-manylinux2014_x86_64.whl (16.7 MB)
[K     |████████████████████████████████| 16.7 MB 40.7 MB/s 
[?25hCollecting pyproj>=2.2.0
  Downloading pyproj-3.2.1-cp37-cp37m-manylinux2010_x86_64.whl (6.3 MB)
[K     |████████████████████████████████| 6.3 MB 34.9 MB/s 
Collecting click-plugins>=1.0
  Downloading click_plugins-1.1.1-py2.py3-none-any.whl (7.5 kB)
Collecting cligj>=0.5
  Downloading cligj-0.7.2-py3-none-any.whl (7.1 kB)
Collecting munch
  Downloading munch-2.5.0-py2.py3-none-any.whl (10 kB)
Installing collected packages: munch, cligj, click-plugins, pyproj, fiona, geopandas
Successfully installed click-plugins-1.1.1 cligj-0.7.2 fiona-1.8.21 geopandas-0.10.2 munch-2.5.0 pyproj-3.2.1
Reading package lists... Done
Building dependency tree       
Reading state information... Done


  shapely_geos_version, geos_capi_version_string


In [90]:
# Load geographic dataframes and give them the correct projection

# train.json comes from the mapillary dataset
# https://www.mapillary.com/dataset/depth
with open('../inputdata/train.json', encoding="UTF-8") as f:
  data = json.load(f)

coords = pd.DataFrame.from_dict(data, orient="index")
coords = coords.drop(["focal", "make", "model"], axis=1)
coords = coords.rename(columns={"lat": "Latitude", "lon": "Longitude"})

# crs 4326 indicates geographic projection
coordsdf = gpd.GeoDataFrame(
    coords, crs=4326, geometry=gpd.points_from_xy(coords.Longitude, coords.Latitude))
coordsdf = coordsdf.to_crs(epsg=6933)

# Climate shapefile found here: http://koeppen-geiger.vu-wien.ac.at/present.htm
climatedf = gpd.read_file("../SHP/ClimateSHP/other_climate_2007_koppen_geiger.shp", encoding="utf-8")
# US State shapefile found here: https://www.census.gov/geographies/mapping-files/time-series/geo/carto-boundary-file.html
usstatesdf = gpd.read_file("../SHP/StateSHP/cb_2018_us_state_500k.shp", encoding="utf-8")
usstatesdf = usstatesdf.to_crs(epsg=6933) # Equal area projection for correct area of different climates
climatedf = climatedf.to_crs(epsg=6933)

In [94]:
# Finds each coordinate's climate polygon. If none, looks for one within 10 kilometers.

climatecoorddf = gpd.sjoin_nearest(df, climatedf, distance_col='distances', max_distance=10000)
climatecoorddf.sort_values(by=['distances'],ascending=False,inplace=True)
#climatecoorddf.drop("index_right", axis=1, inplace=True)
climatecoorddf

Unnamed: 0,Latitude,Longitude,geometry,identity,climate,distances
35w83-qCsDTBg5l3Tr6AMA,46.713377,10.496453,POINT (1012763.682 5333383.543),e455ed09-8fc3-76fb-da5d-b8422bafaf35,Dfc Cold-Withouth_dry_season-Cold_Summer,9998.259494
-YkxOlPfngufDj_zfgVfUQ,46.713359,10.496420,POINT (1012760.547 5333381.916),e455ed09-8fc3-76fb-da5d-b8422bafaf35,Dfc Cold-Withouth_dry_season-Cold_Summer,9996.632392
UWsILvfYOxTbVihnrayoDg,46.713339,10.496394,POINT (1012758.040 5333380.143),e455ed09-8fc3-76fb-da5d-b8422bafaf35,Dfc Cold-Withouth_dry_season-Cold_Summer,9994.859053
4BaTRjEst-COTsWhJh6mow,46.713286,10.496330,POINT (1012751.796 5333375.474),e455ed09-8fc3-76fb-da5d-b8422bafaf35,Dfc Cold-Withouth_dry_season-Cold_Summer,9990.190930
n0KVhbPvT_w4LG-gNaBWJQ,46.713246,10.496281,POINT (1012747.116 5333371.938),e455ed09-8fc3-76fb-da5d-b8422bafaf35,Dfc Cold-Withouth_dry_season-Cold_Summer,9986.654818
...,...,...,...,...,...,...
VKqy3K-Wa3CvlDNvHui3ug,33.587506,-7.652756,POINT (-738385.941 4049171.797),fa65bcb7-9e3e-bcc9-f69c-f0de974f4919,Csa Temperate-Dry_Summer-Hot_Summer,0.000000
VSOLzx_CEhza-OP35QzPSg,35.156298,-2.975218,POINT (-287067.763 4215064.086),fa65bcb7-9e3e-bcc9-f69c-f0de974f4919,Csa Temperate-Dry_Summer-Hot_Summer,0.000000
WXwIXxylK7sGqvtsG0dKCQ,34.042559,-5.011979,POINT (-483587.189 4097604.556),fa65bcb7-9e3e-bcc9-f69c-f0de974f4919,Csa Temperate-Dry_Summer-Hot_Summer,0.000000
UyY34R3M8lyqfhvtMDMNyA,34.009403,-5.035061,POINT (-485814.294 4094084.220),fa65bcb7-9e3e-bcc9-f69c-f0de974f4919,Csa Temperate-Dry_Summer-Hot_Summer,0.000000


In [107]:
stateclimate = gpd.sjoin(climatedf, usstatesdf)
stateclimate = stateclimate.assign(area = stateclimate.area)
stateclimate.drop(["ALAND", "AWATER","index_right"], axis=1, inplace=True)

In [123]:
# Calculates the percentage of each climate in each state

climatepercentagedf = stateclimate.groupby(["NAME", "climate"]).sum()/stateclimate.groupby("NAME").sum()
climatepercentagedf

Unnamed: 0_level_0,Unnamed: 1_level_0,area
NAME,climate,Unnamed: 2_level_1
Alabama,Cfa Temperate-Withouth_dry_season-Hot_Summer,1.000000
Alaska,BSk Arid-Steppe-Cold,0.000012
Alaska,Cfb Temperate-Withouth_dry_season-Warm_Summer,0.007493
Alaska,Cfc Temperate-Withouth_dry_season-Cold_Summer,0.001575
Alaska,Csb Temperate-Dry_Summer-Warm_Summer,0.000397
...,...,...
Wyoming,BSk Arid-Steppe-Cold,0.325917
Wyoming,BWk Arid-Desert-Cold,0.003318
Wyoming,Dfa Cold-Withouth_dry_season-Hot_Summer,0.000457
Wyoming,Dfb Cold-Withouth_dry_season-Warm_Summer,0.659170
