
# Upload and read the datasets
We merge two data geocoded dataset and evictions dataset. 
We also merge LA coutry zip geojson dataset with eviction dataset 

In [None]:
#Import libraries
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots
import statsmodels.api as sm
sns.set()

import warnings # supress warnings
warnings.filterwarnings('ignore')
#import geodatasets

## Upload 2023_Eviction_Notices file and evictions geocoded file
Read the datasets to understand the columns and rows

In [None]:
# Load the datasets using pandas 77091 rows × 12 columns
df_evictions = pd.read_csv('data/2023_Eviction_Notices.csv')
df_evictions.head(5)

In [None]:
df_geocoded = pd.read_csv('data/evictions_geocoded.csv')
df_geocoded.head(5)

## We add geometry to df_geocoded using the following code 

In [None]:
import geopandas as gpd
from shapely.geometry import Point

# Assuming your DataFrame is named df_geocoded
geometry = [Point(lon, lat) for lon, lat in zip(df_geocoded['longitude'], df_geocoded['latitude'])]

# Create a GeoDataFrame with the geometry column
df_geocoded['geometry'] = geometry
df_geocoded_gdf = gpd.GeoDataFrame(df_geocoded, geometry='geometry')

df_geocoded_gdf.head(5)

In [None]:
# Check for null values in the entire DataFrame
null_values = df_geocoded.isnull().sum()

# Display the results
print("Null values in the entire DataFrame:")
print(null_values)

In [None]:
# Check for null values in the entire DataFrame
null_values = df_evictions.isnull().sum()

# Display the results
print("Null values in the entire DataFrame:")
print(null_values)

In [None]:
# Check data types
print(df_evictions.dtypes)
print(df_geocoded.dtypes)

In [None]:
# Check unique values
print(df_evictions['City'].unique())
print(df_evictions['Zip'].unique())

In [None]:
print(df_geocoded['latitude'].unique())
print(df_geocoded['longitude'].unique())

## We load df2_geocoded with LA county zip codes
This file has zipcodes and geometry 

In [None]:
df2_geocoded = gpd.read_file('LA_County_ZIP_Codes.geojson')
df2_geocoded.head(5)

In [None]:
print(df2_geocoded[['ZIPCODE', 'geometry']])

We rename column ZIPCODE to Zip in df2_geocoded 

In [None]:
df2_geocoded = df2_geocoded.rename(columns={'ZIPCODE': 'Zip'})
df2_geocoded.head(5) 

In [None]:
# plot it!
df2_geocoded.plot(figsize=(12,10))

## Merge the dataset df evictions and df2 geocoded 

In [None]:
import pandas as pd
import geopandas as gpd

# Convert 'Zip' column to object type in both DataFrames
df_evictions['Zip'] = df_evictions['Zip'].astype(str)
df2_geocoded['Zip'] = df2_geocoded['Zip'].astype(str)

# Rename 'ZIPCODE' to 'Zip' in df2_geocoded
df2_geocoded = df2_geocoded.rename(columns={'ZIPCODE': 'Zip'})

# Merge based on the 'Zip' column
merged_df2 = pd.merge(df_evictions, df2_geocoded, how='left', on='Zip')

merged_df2.head(5)

# Print the merged dataset
#print(merged_df2)


## We show our folium map based on zip codes
LA zip code 90028 has a darker shade showing highest eviction counts recorded

In [None]:
import folium
from folium import Choropleth, GeoJson
import geopandas as gpd
from shapely.geometry import Point

#We merge 'df2_geocoded' and 'merged_df2' with 'geometry' column

# Drop rows with NaN values in the 'Zip' column
df2_geocoded = df2_geocoded.dropna(subset=['Zip'])

# Create a Folium map
m = folium.Map(location=[34.2, -118.2], zoom_start=10, tiles='CartoDB positron', attribution='CartoDB')

# Calculate value counts for each Zip code
eviction_counts = merged_df2['Zip'].value_counts().reset_index()
eviction_counts.columns = ['Zip', 'Eviction_Count']

# Plot choropleth over the base map
Choropleth(
    geo_data=df2_geocoded,
    data=eviction_counts,
    key_on='feature.properties.Zip',
    columns=['Zip', 'Eviction_Count'],
    fill_color='YlGnBu',
    line_weight=0.1,
    fill_opacity=0.8,
    line_opacity=0.2,
    legend_name='Eviction Counts (2023)',
    highlight=True
).add_to(m)

# Add boundary color to each feature
GeoJson(
   df2_geocoded,
    style_function=lambda feature: {
        'color': 'red',
        'weight': 1,
        'fillOpacity': 0
    },
    tooltip=folium.features.GeoJsonTooltip(fields=['Zip'], labels=False),
    popup=folium.features.GeoJsonPopup(fields=['Zip'], labels=False),
).add_to(m)

# Display the map
m


# Merge two datasets 
We merge the two datasets 2023_Eviction_Notices.csv and evictions_geocoded.csv 

In [None]:
# Extracting the name from the input_string 
df_geocoded['Name'] = df_geocoded['input_string'].apply(lambda x: x.split(',')[0].strip())

In [None]:
df_geocoded.iloc[0]['Name']

In [None]:
df_evictions.iloc[0]['Address']

In [None]:
# Drop duplicates based on 'Name' column in df_geocoded
df_geocoded_unique = df_geocoded.drop_duplicates(subset='Name', keep='first')

# Merging on the 'Name' column with the first occurrence
merged_df = pd.merge(df_geocoded_unique, df_evictions, left_on='Name', right_on='Address', how='inner')

In [None]:
merged_df.columns

In [None]:
#Values of eviction numbers for each city 
merged_df['City'].value_counts()

In [None]:
merged_df['Name'].value_counts()

In [None]:
merged_df['Address'].value_counts()

In [None]:
df_geocoded_gdf = df_geocoded_gdf.rename(columns={'postcode': 'Zip', 'formatted_address': 'Address'})

In [None]:
# We also add geometry to the table 
df_geocoded_gdf.head()

## We test dataset with the census tract 2020 geojson

In [None]:
# read in a geojson file downloaded from the LA Times we create new geoframe
#we read the census_tracts geojson file
tracts=gpd.read_file('data/Census_Tracts_2020.geojson')
tracts.head()

In [None]:
# create a FIPS column
tracts['FIPS'] ='06' + '037' + tracts['CT20']

In [None]:
# check it!
tracts.head()

In [None]:
# plot it!
tracts.plot(figsize=(12,10))

In [None]:
# We merge the merged_df and tracts dataset
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, Polygon

# Assuming the merged dataframe is named merged_df and the CT dataset is named CT

# Create a GeoDataFrame from the CT dataset
geo_tracts = gpd.GeoDataFrame(tracts, geometry='geometry')  # Use the correct geometry column

# Create a GeoDataFrame from the merged_df
geometry_points = [Point(lon, lat) for lon, lat in zip(merged_df['longitude'], merged_df['latitude'])]
geo_merged = gpd.GeoDataFrame(merged_df, geometry=geometry_points)

# Add a 'count' column to merged_df indicating the number of points inside the polygon
merged_df['count'] = geo_merged['geometry'].within(geo_tracts['geometry']).astype(int)

# Displaying the updated merged dataframe
print(merged_df.head())


In [None]:
# we only really need FIPS and geometry, so let's subset the data
tracts = tracts[['CT20','geometry']]
tracts.head()

# Distribution of city evictions 

In [None]:
# Distribution of evictions 
import folium

# Assuming you have 'tracts' and 'merged_df' with 'geometry' column
tracts_map = folium.Map(location=[34.0522, -118.2437], zoom_start=10)  # Adjust the location and zoom as needed

# Plotting tracts on the map
folium.GeoJson(tracts, name='Tracts').add_to(tracts_map)

# Creating a GeoDataFrame from 'merged_df'
geometry_points = [Point(lon, lat) for lon, lat in zip(merged_df['longitude'], merged_df['latitude'])]
geo_merged_df = gpd.GeoDataFrame(merged_df, geometry=geometry_points)

# Filtering out rows with NaN values in 'latitude' and 'longitude'
geo_merged_df = geo_merged_df.dropna(subset=['latitude', 'longitude'])

# Plotting merged_df city value counts on the map
for index, row in geo_merged_df.iterrows():
    folium.CircleMarker([row['latitude'], row['longitude']], radius=row['count'] * 5, color='blue').add_to(tracts_map)

# Display the map
tracts_map

In [None]:
import matplotlib.pyplot as plt
# Assuming 'merged_df' is your DataFrame
city_counts = merged_df['City'].value_counts()

# Plotting bar plot
city_counts.plot(kind='bar', figsize=(12, 10))
plt.title('City Counts')
plt.xlabel('City')
plt.ylabel('Count')
plt.show()