In [None]:
import geopandas as gpd
import pandas as pd

In [None]:
#import LA environmental justice scores 
df = pd.read_csv(
'Data/EJSM_Scores/EJSM_Scores (1).csv' ,
dtype={
    'Tract_1':str
})

In [None]:
#add number zero leading the FIPS code for merging the data with the census tract data
df['Tract_1'] = df['Tract_1'].str.zfill(11)
df.head()

In [None]:
#import 2012 census data
tracts=gpd.read_file('Data/CensusData2012/census-tracts-2012.geojson')
print(tracts)

In [None]:
#list column names
list(tracts)

In [None]:
#drop columns
columns_to_drop = ['set','kind','resource_uri','metadata']

In [None]:
#read columns 
tracts.head()

In [None]:
#drop columns from tracts data
tracts = tracts.drop(columns_to_drop,axis=1)

In [None]:
tracts.info(verbose=True, null_counts=True)

In [None]:
#isolate the FIPS code and geometry column to match with the EJSM data
tracts = tracts[['name','geometry']]
tracts.head()

In [None]:
#show columns only FIPS and geometry columns 
tracts.columns = ['FIPS','geometry']
tracts.head()

In [None]:
#list EJSM df with new FIPS code
list(df)

In [None]:
#See dataframe columns
df.info(verbose=True, null_counts=True)

In [None]:
#rename object Tract_1 to FIPS to match census tract data
df.columns = ['OBJECTID',
 'FIPS',
 'CIscore',
 'HazScore',
 'HealthScore',
 'SVscore',
 'CCVscore',
 'Shape__Area',
 'Shape__Length']
df.head()

In [None]:
#merge data on the same object FIPS
tracts_ejsm=tracts.merge(df,on="FIPS")

In [None]:
#show merge with census data (only population)
tracts_ejsm.head()

In [None]:
#describe stats by proximity to hazard score
tracts_ejsm['HazScore'].describe()

In [None]:
#equal intervalfor Social Vulnerability Score -by standard deviation 
tracts_ejsm.plot(figsize=(24,20),
                 column='SVscore',
                 legend=True, 
                 scheme='equal_interval')

In [None]:
import folium

In [None]:
# Map for HazScore
m = folium.Map(location=[34.2,-118.2], 
               zoom_start = 9,
               tiles='CartoDB positron', 
               attribution='CartoDB')

# plot chorpleth over the base map
folium.Choropleth(
                  geo_data=tracts_ejsm, # geo data
                  data=tracts_ejsm, # data          
                  key_on='feature.properties.FIPS', # key, or merge column
                  columns=['FIPS', 'HazScore'], # [key, value]
                  fill_color='BuPu',
                  line_weight=0.1, 
                  fill_opacity=0.8,
                  line_opacity=0.2, # line opacity (of the border)
                  legend_name='Degree of proximity to Environmenta Hazards)').add_to(m)
m

In [None]:
#begin work for midterm
import plotly.express as px
from sodapy import Socrata

In [None]:
#use API to import crime data
client = Socrata("data.lacity.org", None)
results = client.get("amvf-fr72", limit=10000)
df =pd.DataFrame.from_records(results)

In [None]:
#sample crime data
df.sample(5)

In [None]:
#get description of data and types
df.grp_description.unique()

In [None]:
#get arrest charge value counts
arrest_by_charge = df.grp_description.value_counts().reset_index()
arrest_by_charge

In [None]:
#plotly bar graph to examine the total arrests by LAPD in 2020
px.bar(df,
       x='arst_date',
       title='LAPD Arrests in 2020',
       labels={'arst_date':'Arrest date','counts':'Number of arrests'}
      )

In [None]:
#configure geometry for crime dataframe 
crime = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.lon, df.lat))

In [None]:
#plot crime
crime.plot(figsize=(12,12),color='purple')

In [None]:
#make base layer map ands overlay plot 
base = tracts_ejsm.plot(figsize=(12,10),color='gainsboro', edgecolor='white')

ax = crime.plot(ax=base, color='purple', markersize=5)

In [None]:
#print total bounds of data points 
minx, miny, maxx, maxy = crime.geometry.total_bounds
print(minx)
print(maxx)
print(miny)
print(maxy)

In [None]:
#print map with within closer boundaries 
base = tracts_ejsm.plot(figsize=(12,12),color='gainsboro', edgecolor='white')
ax = crime.plot(ax=base, marker='o', color='purple', markersize=5)
ax.set_xlim(minx - .1, maxx + .1)
ax.set_ylim(miny - .1, maxy + .1)
ax

In [None]:
# configure coordinate reference system for crime -DONE
crime.set_crs(epsg=4326, inplace=True)
crime.crs

In [None]:
# configure coordinate reference system for tracts to join the two dataframes - DONE
tracts.crs

In [None]:
#join tracts and crime data- DONE
join = gpd.sjoin(tracts,
                 crime,
                 how='right')

In [None]:
join.head()

In [None]:
#name crime by tracts -DONE
crime_by_tracts = join.FIPS.value_counts().rename_axis('Tract').reset_index(name='crime_count')

In [None]:
#show crime by tracts -DONE
crime_by_tracts.head()

In [None]:
#make bar graph showing crime concentrations in census tracts -DONE
crime_by_tracts[:50].plot.bar(figsize=(20,8),x='Tract',y='crime_count')

In [None]:
#group data by the description of arrest -NECESSARY and not harmful for exploration
join.groupby(['grp_description']).count()

In [None]:
#group the joined data- can we gather these factors by variable name? -DONE subset
join_grouped=join.groupby(['FIPS','grp_description','lat','lon','arst_date']).count()[['rpt_id']]
join_grouped.head(50)

In [None]:
#flatten data to prep for bar graph- DONE
df_flat = join_grouped.reset_index()
df_flat

In [None]:
#make plotly bar graph for preliminary investigations- look at class notebook
px.bar(df_flat,
       x='grp_description',
       y='rpt_id',
       title='Description of LAPD Arrests in 2020',
       color='grp_description',
       labels={'grp_description':'Arrest Decription','rpt_id':'Number of Arrests'}
      )

In [None]:
px.bar(df_flat,
       x='arst_date',
       y='rpt_id',
       title='LAPD Arrests in 2020',
       color='grp_description',
       labels={'arst_date':'Arrest Decription','rpt_id':'Number of Arrests'}
      )

In [None]:
#merge the ejsm and crime data on FIPS
df_ejsm_crime_tracts = tracts_ejsm.merge(df_flat,on="FIPS").reset_index()

In [None]:
df_ejsm_crime_tracts.head

In [None]:
#susbset the data
df_subset = df[['arst_date','rpt_id','grp_description','HazScore','FIPS','lat','lon']].copy()
df_subset.head()

In [None]:
#convert lat and lon to floats intergrating arrest and EJSM score data
df_ejsm_crime_tracts['lat'] = df_ejsm_crime_tracts['lat'].astype(float)
df_ejsm_crime_tracts['lon'] = df_ejsm_crime_tracts['lon'].astype(float)
df_ejsm_crime_tracts.info()

In [None]:
#basic scatter of df_ejsm_crime_tracts data with coordinate reference system
px.scatter(df_ejsm_crime_tracts,
           x='lon',
           y='lat'
          )

In [None]:
#configure crs
df_ejsm_crime_tracts.crs

In [None]:
#map using mapbox to show all 2020 crime data points with tracts
fig = px.scatter_mapbox(df_ejsm_crime_tracts,
                        lat='lat',
                        lon='lon',
                        mapbox_style="stamen-terrain")
fig.show()

In [None]:
#this doesnt work bc the gpd points are coming from df.lat and lon
df_ejsm_crime_tracts= gpd.GeoDataFrame(df, 
                         crs='EPSG:4326',
                         geometry=gpd.points_from_xy(df.lon, df.lat))
df.info()

In [None]:
#import kepler and make base
from keplergl import KeplerGl

In [None]:
#folium requires a center point

minx, miny, maxx, maxy = df_ejsm_crime_tracts.geometry.total_bounds
print(minx)
print(maxx)
print(miny)
print(maxy)
center_lon = (maxx-minx)/2+minx
center_lon
center_lat = (maxy-miny)/2+miny
center_lat

In [None]:
map = KeplerGl(height=600,width=800)
map

In [None]:
map.add_data(data=df_ejsm_crime_tracts,name='EJSM')

In [None]:
map.save_to_html(file_name='df_ejsm_crime_tracts1.html',read_only=True)

In [None]:
# m = folium.Map(location=[center_lat, center_lon],tiles='cartodbpositron',)
# create an empty marker cluster layer
marker_cluster = MarkerCluster(name='LAPD Arrests in September, 2020').add_to(m)

# loop through the arrest data and add each row as a marker to the marker cluster
for index, row in df_ejsm_crime_tracts.iterrows()

    popup = '<strong>'+str(row.grp_description)+'</strong><hr>'+'Age: '+str(row.age)+'<br>Sex: '+str(row.sex_cd)+'<br>Race: '+str(row.descent_cd)
    folium.Marker(
        location=[row.lat,row.lon],
        tooltip=row.grp_description,
        popup= popup,
        icon=folium.Icon(color='red')
    ).add_to(marker_cluster)

m

In [None]:
m = folium.Map(location=[34.2,-118.2], 
               zoom_start = 10,
               tiles='CartoDB positron', 
               attribution='CartoDB')

# plot chorpleth over the base map
folium.Choropleth(
                  geo_data=tracts_ejsm, # geo data
                  data=df_ejsm_crime_tracts, # data          
                  key_on='feature.properties.FIPS', # key, or merge column
                  columns=['FIPS', 'HazScore'], # [key, value]
                  fill_color='BuPu',
                  line_weight=0.1, 
                  fill_opacity=0.8,
                  line_opacity=0.2, # line opacity (of the border)
                  legend_name='Degree of Proxoximity to Environmental Hazards)').add_to(m)    # name on the legend color bar
m

In [None]:
f2=folium.FeatureGroup(name='df_ejsm_crime_tracts').add_to(m)
map

In [None]:
data.info()