# Library Import and Early Data Analysis

In [None]:
## load libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import requests #package for http requests
import bs4 # package for html parsing

In [None]:
df = pd.read_csv("slf.csv")

print("Rows before Cleaning: ", len(df))
df.head()

Rows before Cleaning:  63


Unnamed: 0,OBJECTID,GNIS_ID,FIPS_CODE,zone,ABBREV,slf,population,CALC_SQ_MI,Shape__Area,Shape__Length,NAME,Related_Counties,NJ_Border
0,1.0,974099.0,36001.0,East,ALBA,0.7,314848,532.791779,2549820000.0,225767.6819,Albany,"Rensselaer, Schenectady, Greene",N
1,2.0,974100.0,36003.0,West,ALLE,0.0,46456,1035.209131,4894141000.0,284462.8741,Allegany,"Cattaraugus, Steuben, Livingston",N
2,3.0,974101.0,36005.0,Long Island,BRON,0.7,1472654,57.472148,260546900.0,75742.96732,Bronx,"New York, Westchester",N
3,4.0,974102.0,36007.0,Central,BROO,0.7,198683,715.287465,3376131000.0,307651.781,Broome,"Chenango, Delaware, Tioga, Cortland",N
4,5.0,974103.0,36009.0,West,CATT,0.0,77042,1324.309219,6254588000.0,373112.4714,Cattaraugus,"Erie, Wyoming, Allegany",N


In [None]:
def calculate_transmission(row, column_name):
    # Get the list of counties from the 'Related_Counties' column
    counties = str(row['Related_Counties']).split(', ')

    # Initialize variables for total sfl and count of counties
    total_sfl = 0
    count_counties = 0

    # Loop through each county in the list
    for county in counties:
        if county == 'Orange' or county == 'Rockland' or county == 'Richmond':
            total_sfl += 1.0
            count_counties += 1

        # Find the corresponding row in the DataFrame
        county_row = df[df['NAME'] == county]

        # Check if the county is found
        if not county_row.empty:
            # Get the sfl value for the county and add to total_sfl
            total_sfl += float(county_row[column_name])
            # Increment the count of counties
            count_counties += 1

    # Calculate transmission and handle division by zero
    transmission = total_sfl / count_counties if count_counties > 0 else 0

    return min(transmission, 1.0)


# Apply the function to create the 'transmission' column
df['transmission'] = df.apply(calculate_transmission, column_name='slf', axis=1)


In [None]:
import pandas as pd

# Assuming df is your DataFrame
years = 16
K = .85
r = .7623
# Other potential values of within some interval

df['slf_t1'] = df['slf'] + df['slf'] * r * (1 - df['slf']/K) + df['transmission'] * (df['population'] / df['population'].max())
df['slf_t1']= np.minimum(df['slf_t1'], 1)
df['transmission'] = df.apply(calculate_transmission, column_name='slf_t1', axis=1)

for i in range(2, years):  # Loop through years 2 to 5
    column_name = f'slf_t{i}'
    prev_column_name = f'slf_t{i-1}'  # Use the correct previous column name
    df[column_name] = df[prev_column_name] + df[prev_column_name] * r * (1 - df[prev_column_name]) + df['transmission'] * (1 - 1/df['CALC_SQ_MI']) * (df['population'] / df['population'].max())
    df[column_name]= np.minimum(df[column_name], 1)
    df['transmission'] = df.apply(calculate_transmission, column_name=column_name, axis=1)

df.head(20)

Unnamed: 0,OBJECTID,GNIS_ID,FIPS_CODE,zone,ABBREV,slf,population,CALC_SQ_MI,Shape__Area,Shape__Length,...,slf_t6,slf_t7,slf_t8,slf_t9,slf_t10,slf_t11,slf_t12,slf_t13,slf_t14,slf_t15
0,1.0,974099.0,36001.0,East,ALBA,0.7,314848,532.791779,2549820000.0,225767.6819,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,2.0,974100.0,36003.0,West,ALLE,0.0,46456,1035.209131,4894141000.0,284462.8741,...,0.268729,0.426422,0.624215,0.817528,0.947707,1.0,1.0,1.0,1.0,1.0
2,3.0,974101.0,36005.0,Long Island,BRON,0.7,1472654,57.472148,260546900.0,75742.96732,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,4.0,974102.0,36007.0,Central,BROO,0.7,198683,715.287465,3376131000.0,307651.781,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,5.0,974103.0,36009.0,West,CATT,0.0,77042,1324.309219,6254588000.0,373112.4714,...,0.473927,0.678634,0.862575,0.97436,1.0,1.0,1.0,1.0,1.0,1.0
5,6.0,974104.0,36011.0,Central,CAYU,0.3,76248,881.823505,4277469000.0,458522.3593,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
6,7.0,974105.0,36013.0,West,CHAU,0.0,127657,1507.794553,7125137000.0,334424.5383,...,0.767398,0.937829,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
7,8.0,974106.0,36015.0,Central,CHEM,0.3,84148,410.959316,1937765000.0,198267.4408,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
8,9.0,974107.0,36017.0,Central,CHEN,0.0,47220,897.818644,4282720000.0,308026.6034,...,0.402598,0.601028,0.79982,0.938638,0.999733,1.0,1.0,1.0,1.0,1.0
9,10.0,974108.0,36019.0,East,CLIN,0.0,79843,1116.813729,5737787000.0,331297.3503,...,0.004318,0.008188,0.015422,0.028804,0.053188,0.096551,0.170633,0.289082,0.458937,0.663191


In [None]:
count_gt3 = ((df['slf'] > 0.29).sum()) /len(df)
count_gt7 = (df['slf'] > 0.69).sum()/len(df)

print(f"Percent of counties with sightings: {count_gt3}")
print(f"Percent of counties with infestations: {count_gt7}")

Percent of counties with sightings: 0.49206349206349204
Percent of counties with infestations: 0.36507936507936506


In [None]:
count_gt32 = ((df['slf_t11'] > 0.29).sum()) /len(df)
count_gt72 = (df['slf_t11'] > 0.69).sum()/len(df)

print(f"Percent of counties with sightings: {count_gt32}")
print(f"Percent of counties with infestations: {count_gt72}")

Percent of counties with sightings: 0.8412698412698413
Percent of counties with infestations: 0.8253968253968254


In [None]:
count_gt32 = ((df['slf_t13'] > 0.29).sum()) /len(df)
count_gt72 = (df['slf_t13'] > 0.69).sum()/len(df)

print(f"Percent of counties with sightings: {count_gt32}")
print(f"Percent of counties with infestations: {count_gt72}")

Percent of counties with sightings: 0.9047619047619048
Percent of counties with infestations: 0.8412698412698413


In [None]:
df.to_csv('output_file.csv', index=False)

In [None]:
import folium

#Create a map:
schoolMap = folium.Map(location=[40.75, -74.125])

#Create a layer, shaded by test scores:
schoolMap.choropleth(geo_data="Counties.geojson",
                     fill_opacity=0.5, line_opacity=0.5
                     )

#Output the map to an .html file:
schoolMap #.save(outfile='counties.html')

Output hidden; open in https://colab.research.google.com to view.