## Enable the geo-visualization for the dataset

In [14]:
pip install folium

Note: you may need to restart the kernel to use updated packages.


In [76]:
import pandas as pd
import folium

# Load TSV data
haunted_df = pd.read_csv('../Data/haunted_places_with_alcohol_daylight.tsv', sep='\t')

# Initialize the map
m = folium.Map(
    location=[37.8, -96.9],  # Approximate U.S. center
    zoom_start=4,
    tiles='CartoDB Positron'  # Clean basemap
)

# Add simple markers
for idx, row in haunted_df.iterrows():
    folium.CircleMarker(
        location=[row['latitude'], row['longitude']],
        radius=2,  # Tiny dot
        color='red',
        fill=True,
        fill_color='red'
    ).add_to(m)


In [78]:
m

## Add the geometry for haunted houses data
The coordinate system of the dataset is WGS84 which is indiced with 4326

In [160]:
from shapely.geometry import Point

haunted_gdf = gpd.GeoDataFrame(
    haunted_df,
    geometry=[Point(xy) for xy in zip(haunted_df.longitude, haunted_df.latitude)],
    crs=4326
)

## Add the 1st dataset
This dataset is a combined with two datasets. This first one is the GeoJSON file representing the shapes of all counties in the United States. The second dataset is a census data includes the population(2020), the number of religious adherents (who have a religious faith), and the percentage of that proportion for each county, in .xlsx format. 

### Prepare the combined dataset

In [162]:
import pandas as pd
import geopandas as gpd

# Load Counties GeoJSON
counties_gdf = gpd.read_file('../Data/georef-united-states-of-america-county.geojson')

counties_gdf = counties_gdf.rename(columns={
    'ste_name': 'state',
    'coty_name': 'county'
})
# I'm doing this step because those fields are String List type instead of String type
counties_gdf['state'] = counties_gdf['state'].apply(lambda x: ' '.join(x) if isinstance(x, list) else str(x))
counties_gdf['county'] = counties_gdf['county'].apply(lambda x: ' '.join(x) if isinstance(x, list) else str(x))

counties_gdf['state'] = counties_gdf['state'].str.strip().str.title()
counties_gdf['county'] = counties_gdf['county'].str.strip().str.title()

# Load Census Data
census_df = pd.read_excel(
    '../Data/2020_USRC_Summaries.xlsx', 
    sheet_name='2020 County Summary'  # Specify the sheet name
)

census_df = census_df.rename(columns={
    'State Name': 'state',
    'County Name': 'county'
})

census_df['county'] = census_df['county'].str.replace(' County', '', regex=False)

census_df['state'] = census_df['state'].str.strip().str.title()
census_df['county'] = census_df['county'].str.strip().str.title()

# Merge County geometries with Census data
counties_merged = counties_gdf.merge(
    census_df,
    on=['state', 'county'],
    how='left'
)

### Join the combined dataset to the haunted houses dataset

In [164]:
# Spatial Join with Counties
joined_df = haunted_gdf.sjoin(
    counties_merged[['geometry', 'county', '2020 Population', 'Adherents', 'Adherents as % of Population']],
    how='left',
    predicate='within'
)

### Calculate and add the field "count of haunted houses per county"

In [166]:
county_counts = joined_df.groupby('county').size().reset_index(name='Haunted Houses Count per County')

# Merge counts back into the main dataset
joined_df = pd.merge(
    joined_df,
    county_counts,
    on='county',
    how='left'
)

### Calculate and add the field "count of haunted houses per 1000 people in the county"

In [168]:
joined_df['Haunted houses per 1000'] = (joined_df['Haunted Houses Count per County'] / joined_df['2020 Population']) * 1000

In [188]:
joined_df

Unnamed: 0,city,country,description,location,state,state_abbrev,longitude,latitude,city_longitude,city_latitude,...,find sunrise and sunset for other places….3,find sunrise and sunset for other places….4,geometry,index_right,county,2020 Population,Adherents,Adherents as % of Population,Haunted Houses Count per County,Haunted houses per 1000
0,Ada,United States,Ada witch - Sometimes you can see a misty blue...,Ada Cemetery,Michigan,MI,-85.504893,42.962106,-85.495480,42.960727,...,,,POINT (-85.50489 42.96211),1416.0,Kent,657974.0,282420.0,0.429227,46.0,0.069912
1,Addison,United States,A little girl was killed suddenly while waitin...,North Adams Rd.,Michigan,MI,-84.381843,41.971425,-84.347168,41.986434,...,,,POINT (-84.38184 41.97142),2347.0,Hillsdale,45746.0,11128.0,0.243256,3.0,0.065580
2,Adrian,United States,If you take Gorman Rd. west towards Sand Creek...,Ghost Trestle,Michigan,MI,-84.035656,41.904538,-84.037166,41.897547,...,,,POINT (-84.03566 41.90454),173.0,Lenawee,99423.0,33427.0,0.336210,3.0,0.030174
3,Adrian,United States,"In the 1970's, one room, room 211, in the old ...",Siena Heights University,Michigan,MI,-84.017565,41.905712,-84.037166,41.897547,...,,,POINT (-84.01757 41.90571),173.0,Lenawee,99423.0,33427.0,0.336210,3.0,0.030174
4,Albion,United States,Kappa Delta Sorority - The Kappa Delta Sororit...,Albion College,Michigan,MI,-84.745177,42.244006,-84.753030,42.243097,...,,,POINT (-84.74518 42.24401),596.0,Calhoun,134310.0,43340.0,0.322686,19.0,0.141464
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10969,Westminster,United States,at 12 midnight you can see a lady with two lit...,city hall,Colorado,CO,-105.048936,39.862610,-105.037205,39.836653,...,,,POINT (-105.04894 39.86261),1136.0,Adams,519572.0,139606.0,0.268694,45.0,0.086610
10970,Westminster,United States,Is haunted by the victims of a murder that hap...,Pillar of Fire,Colorado,CO,-105.032091,39.847237,-105.037205,39.836653,...,,,POINT (-105.03209 39.84724),1136.0,Adams,519572.0,139606.0,0.268694,45.0,0.086610
10971,Wheat Ridge,United States,The institution was for kids 18 years old and ...,Ridge Mental Institution,Colorado,CO,-105.063974,39.769726,-105.077206,39.766098,...,,,POINT (-105.06397 39.76973),64.0,Jefferson,582910.0,181477.0,0.311329,119.0,0.204148
10972,Wheat Ridge,United States,Gymnasium - their have been reports of a litt...,Wheat Ridge Middle School,Colorado,CO,-105.103613,39.764055,-105.077206,39.766098,...,,,POINT (-105.10361 39.76405),64.0,Jefferson,582910.0,181477.0,0.311329,119.0,0.204148
