#  Outline

This notebook grab the city data and brown stink bug survey data from Minnesota. It then processes the data to conduct QAQC steps, which is to check the coordinate system and convert it to 4326 for POSTGIS, and identify duplicate cities. In the end, it creates a table that calculate distance between cities

In [1]:
import arcpy
import json
import zipfile
import pprint
import requests
from arcpy import env
import os
from zipfile import ZipFile
import random
import time

In [7]:
# Define the URL and local directory
url = "https://resources.gisdata.mn.gov/pub/gdrs/data/pub/us_mn_state_dot/bdry_mn_city_township_unorg/shp_bdry_mn_city_township_unorg.zip"
local_directory = r"C:\final_project"

# Function to download a file from a URL
def download_file(url, local_directory):
    response = requests.get(url)
    file_name = os.path.join(local_directory, url.split("/")[-1])

    with open(file_name, "wb") as f:
        f.write(response.content)
    return file_name

# Function to unzip a file
def unzip_file(file_path, destination):
    with ZipFile(file_path, "r") as zip_ref:
        zip_ref.extractall(destination)

# Create the local directory if it doesn't exist
if not os.path.exists(local_directory):
    os.makedirs(local_directory)

# Download the file and unzip it
zip_file_path = download_file(url, local_directory)
unzip_file(zip_file_path, local_directory)

print("Download and extraction completed for city_township_unorg shapefile")


Download and extraction completed!


In [8]:
# Define the URL and local directory
url = "https://resources.gisdata.mn.gov/pub/gdrs/data/pub/us_mn_state_mda/biota_bmsb/shp_biota_bmsb.zip"
local_directory = r"C:\final_project"

# Function to download a file from a URL
def download_file(url, local_directory):
    response = requests.get(url)
    file_name = os.path.join(local_directory, url.split("/")[-1])

    with open(file_name, "wb") as f:
        f.write(response.content)
    return file_name

# Function to unzip a file
def unzip_file(file_path, destination):
    with ZipFile(file_path, "r") as zip_ref:
        zip_ref.extractall(destination)

# Create the local directory if it doesn't exist
if not os.path.exists(local_directory):
    os.makedirs(local_directory)

# Download the file and unzip it
zip_file_path = download_file(url, local_directory)
unzip_file(zip_file_path, local_directory)

print("Download and extraction completed for stinkbug shapefile")


Download and extraction completed for stinkbug shapefile


# QAQC 

first step of the qaqc is to create a new column "All" that sums the "Adults" and "Nymphs" column together, and then create a "ground trugh" column based on "All" column

Second step is to convert the city shapefile to 4326, select only cities and remove unnecesary fields and identify duplicate cities by sum their populatino and merge the duplicate cities polygons.

Third step is to join the BMSB table to MN_Cities data and find the closest city to BMSB.


# BMSB data

In [9]:
# Convert BMSB table to point
arcpy.management.XYTableToPoint("BMSBSurveyDataTable", r"C:\Users\Maochuan\OneDrive\文档\ArcGIS\Projects\arc2_final_project\arc2_final_project.gdb\BMSBSurveyDataTable_XYTableToPoint", "Longitude", "Latitude", None, 'GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]];-400 -400 1000000000;-100000 10000;-100000 10000;8.98315284119521E-09;0.001;0.001;IsHighPrecision')

# creating a new column that sums the "Adults" and "Nymphs"
arcpy.management.CalculateField("BMSBSurveyDataTable_XYTableToPoint", "ALL", "!Adults! + !Nymphs!", "PYTHON3", '', "SHORT", "NO_ENFORCE_DOMAINS")


In [10]:
#creating a ground truth column based on the "ALL" and uses the following 'codeblock' to populate "G_Truth"
codeblock = """
def getClass(column):
    if int(column) == 0:
        return 0
    elif int(column) >= 1:
        return 1"""    

arcpy.management.CalculateField("BMSBSurveyDataTable_XYTableToPoint", "G_Truth", "getClass(!ALL!)", "PYTHON3", codeblock, "SHORT", "NO_ENFORCE_DOMAINS")
#delete unneeded columns within BMSB data 
arcpy.management.DeleteField("BMSBSurveyDataTable_XYTableToPoint", "City;Year;CheckDate;Adults;Nymphs;ALL;G_Truth", "KEEP_FIELDS")

In [18]:
#saves just the polygons for cities within MN
arcpy.conversion.FeatureClassToFeatureClass("city_township_unorg", r"C:\final_project", "MN_Cities.shp", "CTU_CLASS = 'CITY'", 'GNIS_FEATU "GNIS_FEATU" true true false 10 Long 0 10,First,#,city_township_unorg,GNIS_FEATU,-1,-1;FEATURE_NA "FEATURE_NA" true true false 254 Text 0 0,First,#,city_township_unorg,FEATURE_NA,0,254;CTU_CLASS "CTU_CLASS" true true false 25 Text 0 0,First,#,city_township_unorg,CTU_CLASS,0,25;COUNTY_GNI "COUNTY_GNI" true true false 10 Long 0 10,First,#,city_township_unorg,COUNTY_GNI,-1,-1;COUNTY_COD "COUNTY_COD" true true false 2 Text 0 0,First,#,city_township_unorg,COUNTY_COD,0,2;COUNTY_NAM "COUNTY_NAM" true true false 100 Text 0 0,First,#,city_township_unorg,COUNTY_NAM,0,100;POPULATION "POPULATION" true true false 10 Long 0 10,First,#,city_township_unorg,POPULATION,-1,-1;SHAPE_Leng "SHAPE_Leng" true true false 19 Double 0 0,First,#,city_township_unorg,SHAPE_Leng,-1,-1;SHAPE_Area "SHAPE_Area" true true false 19 Double 0 0,First,#,city_township_unorg,SHAPE_Area,-1,-1', '')

#converts "MN_Cities" spatial reference system to 4326
arcpy.management.Project("MN_Cities", r"C:\Users\Maochuan\OneDrive\文档\ArcGIS\Projects\arc2_final_project\arc2_final_project.gdb\MN_Cities_Project", 'GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]]', "WGS_1984_(ITRF00)_To_NAD_1983", 'PROJCS["NAD_1983_UTM_Zone_15N",GEOGCS["GCS_North_American_1983",DATUM["D_North_American_1983",SPHEROID["GRS_1980",6378137.0,298.257222101]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Transverse_Mercator"],PARAMETER["False_Easting",500000.0],PARAMETER["False_Northing",0.0],PARAMETER["Central_Meridian",-93.0],PARAMETER["Scale_Factor",0.9996],PARAMETER["Latitude_Of_Origin",0.0],UNIT["Meter",1.0]]', "NO_PRESERVE_SHAPE", None, "NO_VERTICAL")

# make a new field then copy the old field values into the new one (enables me to rename 'Feature_NA' to 'City_SJ')
arcpy.management.CalculateField("MN_Cities_Project", "CITY_SJ", "!FEATURE_NA!", "PYTHON3", '', "TEXT", "NO_ENFORCE_DOMAINS")

#delete unnecessary columns
arcpy.management.DeleteField("MN_Cities_Project", "CITY_SJ;POPULATION", "KEEP_FIELDS")

#removing duplicates cities by disolving the polygons that have the same 'CITY_SJ' 
arcpy.management.Dissolve("MN_Cities_Project", "MN_Cities_dissolve_Project", "CITY_SJ", [["POPULATION", 'SUM']],"MULTI_PART", "DISSOLVE_LINES")

# make a new field then copy the old field values into the new one (enables me to rename 'Sum_Population' to 'POPULATION')
arcpy.management.CalculateField("MN_Cities_dissolve_Project", "POPULATION", "!SUM_POPULATION!", "PYTHON3", '', "LONG", "NO_ENFORCE_DOMAINS")

#delete unnecessary columns ('SUM_POPULA')
arcpy.management.DeleteField("MN_Cities_dissolve_Project", "CITY_SJ;POPULATION", "KEEP_FIELDS")



# Spatial Join

In [20]:
#outputs a layer with MN_CITIES_dis_WGS columns (CITY_SJ, and Population) append to BMSB data
targetFeatures = "BMSBSurveyDataTable_XYTableToPoint"
joinFeatures = "MN_Cities_dissolve_Project"
outfc =  r"C:\Users\Maochuan\OneDrive\文档\ArcGIS\Projects\arc2_final_project\arc2_final_project.gdb\BMSBSurveyDataTa_SpatialJoin1"

# Create a new fieldmappings and add the two input feature classes. (this appends the columns from joinFeatures)
fieldmappings = arcpy.FieldMappings()
fieldmappings.addTable(targetFeatures)
fieldmappings.addTable(joinFeatures)
 
#Run the Spatial Join tool, using the defaults for the join operation and join type
arcpy.SpatialJoin_analysis(targetFeatures, joinFeatures, outfc, "#", "#", fieldmappings, "CLOSEST_GEODESIC")


In [21]:
#convert each City to points (has been poylgons until now):
arcpy.management.FeatureToPoint("MN_Cities_dissolve_Project", "MN_Cities_Pts_Project.shp", "CENTROID")

#Note: error:  The length of Field Name must not be larger than 10 --an issue only for "BMSB_Sim100"
for i in range(0,50):
    fieldname = "BMSB_Sim"+str(i)
    arcpy.management.AddField("MN_Cities_Pts_Project", fieldname, "SHORT", 1, None, None, '', "NULLABLE", "NON_REQUIRED", '')

#adding accuracy/confusion matrix columns to sum for each cities 100 simulations
col_list = ['overall_TP', 'overall_FP', 'overall_FN', 'overall_TN']
for name in col_list:
    arcpy.management.AddField("MN_Cities_Pts_Project", name, "SHORT", 3, None, None, '', "NULLABLE", "NON_REQUIRED", '')
    
#creating a new column that calculates ranking each city based on its FP, and 
arcpy.management.AddField("MN_Cities_Pts_Project", "Presence", "DOUBLE", None, None, None, '', "NULLABLE", "NON_REQUIRED", '')
    
#creating a new column that calculates the Accuracy for each cities' predictions
arcpy.management.AddField("MN_Cities_Pts_Project", "Accuracy", "DOUBLE", None, None, None, '', "NULLABLE", "NON_REQUIRED", '')

#creating a new column that calculates ranking each city based on its FP, and 
arcpy.management.AddField("MN_Cities_Pts_Project", "Rank", "SHORT", 3, None, None, '', "NULLABLE", "NON_REQUIRED", '')


# Generate Near City Table

In [26]:
#code makes a table for every city pair
arcpy.analysis.GenerateNearTable("MN_Cities_Pts_Project", "MN_Cities_Pts_Project", "MN_Cities_Pts_NearTable", None, "NO_LOCATION", "NO_ANGLE", "ALL", 1000, "GEODESIC")

#delete column "NEAR_RANK"
arcpy.management.DeleteField("MN_Cities_Pts_NearTable", "NEAR_RANK", "DELETE_FIELDS")

In [23]:
# Delete duplicate pairs of cities

#set fields and feature class
fields = ('IN_FID', 'NEAR_FID', 'NEAR_DIST')
fc = "MN_Cities_Pts_NearTable"

rowcount = 1
with arcpy.da.UpdateCursor(fc, fields, sql_clause=(None, 'ORDER BY NEAR_DIST ASC')) as cursor:
    for row in cursor:
        if (rowcount % 2) == 0:
            cursor.deleteRow()
        rowcount += 1
            
print("Duplicates removed")
print(arcpy.management.GetCount("MN_Cities_Pts_NearTable"))

Duplicates removed
364231


In [28]:
#city 1: adding the city name fields (easier to read the name of the city istead of its FID)
arcpy.management.JoinField("MN_Cities_Pts_NearTable", "IN_FID", "MN_Cities_Pts_Project", "FID", "CITY_SJ")
arcpy.management.AlterField("MN_Cities_Pts_NearTable", "CITY_SJ", "CITY_1", "CITY_1", "TEXT", 512, "NULLABLE", "DO_NOT_CLEAR")
#city2
arcpy.management.JoinField("MN_Cities_Pts_NearTable", "NEAR_FID", "MN_Cities_Pts_Project", "FID", "CITY_SJ")
arcpy.management.AlterField("MN_Cities_Pts_NearTable", "CITY_SJ", "CITY_2", "CITY_2", "TEXT", 512, "NULLABLE", "DO_NOT_CLEAR")

In [29]:
# adds probability column to near table
# ONLY RUN ONCE
fc = 'MN_Cities_Pts_NearTable'
arcpy.management.AddField(fc, "TransProb", "DOUBLE", None, None, None, "TransProb", "NULLABLE", "NON_REQUIRED", '')

In [30]:
#JOIN CITY_1 population to near table
arcpy.management.JoinField("MN_Cities_Pts_NearTable", "CITY_1", "MN_Cities_Pts_Project", "CITY_SJ", "POPULATION")
arcpy.management.AlterField("MN_Cities_Pts_NearTable", "POPULATION", "CITY_1_POP", "CITY_1_POP", "LONG", 4, "NULLABLE", "DO_NOT_CLEAR")

#JOIN CITY_2 population to near table
arcpy.management.JoinField("MN_Cities_Pts_NearTable", "CITY_2", "MN_Cities_Pts_Project", "CITY_SJ", "POPULATION")
arcpy.management.AlterField("MN_Cities_Pts_NearTable", "POPULATION", "CITY_2_POP", 'CITY_2_POP', "LONG", 4, "NULLABLE", "DO_NOT_CLEAR")

# Huff Model

In [2]:
def getPresence(fc, fields, SimCol):
    '''
    looking through current simulation layer and organizes cities into Absence or Presence lists
    '''
    presenceList = []
    absenceList = []

    with arcpy.da.SearchCursor(fc, fields) as cursor:
        for row in cursor:
            if row[2] == 1:
                presenceList.append(row[0])
            else:
                absenceList.append(row[0])
    return presenceList, absenceList


In [3]:
def runModel(fc, fields, presenceList, absenceList):
    '''
    Uses presence and absence list, to check the relationship between each pair of cities (we only care to check 
    cities with 0-1 or 1-0). If we find Cities pairs that meet this requirement, then we test its Huff probability against a 
    random number to determine if the destCity will now be presence.
    
    returns a list of Cities that BMSB spreads to
    '''
    
    spreadList = [] #initialize spreadList
    
    
    with arcpy.da.SearchCursor(fc, fields) as cursor:

        for row in cursor:
            if row[0] in presenceList:
                destCity = row[1]

            elif row[1] in presenceList:
                destCity = row[0]

            else:
                continue

            TransProb = row[2]

            if destCity in absenceList:
#                 print('destCity TransProb is being tested')
                if TransProb >= random.random():
                    spreadList.append(destCity)         
    
    return spreadList

In [4]:
def updateSim(fc, fields, spreadList):
    '''
    using the returned 'Spreadlist' to update MN_CIties_Pts_WGS
    '''
    
    with arcpy.da.UpdateCursor(fc, fields) as cursor:
        for row in cursor:
            if row[0] in spreadList:
                row[1] = 1
                cursor.updateRow(row)

In [5]:
def calculateHuff(fc, fields, alpha):
    '''
    calculates the transfer probabilities (updates the "TransProb" column in 'MN_Cities_Pts_NearTable')
    '''
    HuffDenominator = 0
    
    #first calculate HuffDenominator from sum of HuffNumerators in city pairs
    # looking through Neartable
    with arcpy.da.SearchCursor(fc, fields) as cursor:
        
        for row in cursor:
            
            #assign values
            city1Pop = row[3]
            city2Pop = row[4]
            distance = row[0]
            
            #calculate HuffNumerator and add to HuffDenominator
            HuffNumerator = ((city1Pop * city2Pop) / (distance ** alpha))
            HuffDenominator += HuffNumerator
    
    #use UpdateCursor to calculate and assign probability of transfer for city pairs
    with arcpy.da.UpdateCursor(fc, fields) as cursor:
        
        for row in cursor:
            
            #assign values
            city1Pop = row[3]
            city2Pop = row[4]
            distance = row[0]
            
            #calculate transProb and update row
            HuffNumerator = ((city1Pop * city2Pop) / (distance ** alpha))
            
            #emulates the idea of running daily timesteps (without having to run 1800 timesteps for true daily outputs)
            scalar = 30 # SET SCALAR MANUALLY
            
            row[5] = scalar * (HuffNumerator / HuffDenominator)
            cursor.updateRow(row)

In [6]:
#Calculates and Stores 'Transfer Probabilities' for all city pairs

fc = 'MN_Cities_Pts_NearTable'
fields = ['NEAR_DIST', 'CITY_1', 'CITY_2','CITY_1_POP', 'CITY_2_POP', 'TransProb']
alpha = 2

print('Running...')
calculateHuff(fc, fields, alpha)
print('Complete')

Running...
Complete


In [7]:
start_time = time.time()

#run 100 simulations
for i in range(0,1):
    
    SimCol = 'BMSB_Sim'+str(i)


    #populate Sim column with seed city
    fc = 'MN_Cities_Pts_Project'
    seed = "Minneapolis" #SEED CITY DEFINED MANUALLY
    fields = ['CITY_SJ', 'POPULATION', SimCol]

    with arcpy.da.UpdateCursor(fc, fields) as cursor:
        for row in cursor:
            if row[0] == seed:
                row[2] = 1
            else:
                row[2] = 0
            cursor.updateRow(row)

    timestep = 12 #SET NUMBER OF TIMESTEPS
                    # 5 years = 60months = 260 weeks = 1825 days

    for k in range(0, timestep):

        fc = 'MN_Cities_Pts_Project'
        fields = ['CITY_SJ', 'POPULATION', SimCol]

        presenceList, absenceList = getPresence(fc, fields, SimCol)

        # simulate spread in a single timestep
        fc = 'MN_Cities_Pts_NearTable'
        fields = ['CITY_1', 'CITY_2','TransProb']

        spreadList = runModel(fc, fields, presenceList, absenceList)
        print("Sim " +str(i) +" | Step " + str(k) + ": " + str(spreadList))

        # update spread in a single timestep
        fc = 'MN_Cities_Pts_Project'
        fields = ['CITY_SJ', SimCol]

        updateSim(fc, fields, spreadList)

    print('Simulation Complete')
print("All Simulations Complete")
print("--- %s seconds ---" % (time.time() - start_time))

Sim 0 | Step 0: ['Fridley', 'Maple Grove', 'Golden Valley', 'Saint Louis Park', 'Edina', 'Crystal', 'Saint Paul', 'New Brighton', 'Newport', 'Nowthen', 'Richfield']
Sim 0 | Step 1: ['Eagan', 'Inver Grove Heights', 'Lake Elmo', 'Maplewood', 'Arden Hills', 'Rochester', 'Minnetonka', 'Plymouth', 'Plymouth', 'Robbinsdale', 'West Saint Paul', 'Roseville', 'North Saint Paul', 'Woodbury']
Sim 0 | Step 2: ['Blaine', 'Coon Rapids', 'South Saint Paul', 'Mahtomedi', 'Brooklyn Park', 'Bloomington', 'Shoreview', 'Northfield', 'Little Canada']
Sim 0 | Step 3: ['Eden Prairie', 'Brooklyn Center', 'Brooklyn Center', 'Centerville', 'Forest Lake', 'Grant', 'Hopkins', 'Lino Lakes', 'Saint Anthony', 'Savage', 'Mounds View', 'Oakdale', 'Andover', 'Mendota Heights']
Sim 0 | Step 4: ['Buffalo', 'Rosemount', 'Shakopee']
Sim 0 | Step 5: ['Apple Valley', 'Prior Lake', 'Champlin', 'Columbia Heights', 'Columbia Heights', 'Falcon Heights', 'Lilydale', 'White Bear Lake', 'Falcon Heights', 'Prior Lake', 'New Hope', '