In [1]:
import urllib.request
import concurrent.futures
import pandas as pd
import requests
import json
import math
import time
from datetime import datetime

start = time.perf_counter()

baseUrl = "https://www.fema.gov/api/open/v1/IndividualsAndHouseholdsProgramValidRegistrations?$filter=damagedStateAbbreviation%20eq%20%27FL%27%20and%20incidentType%20eq%20%27Hurricane%27&$select=incidentType,disasterNumber,ihpAmount,floodDamageAmount,foundationDamageAmount,roofDamageAmount&$orderby=declarationDate"

top = 1000  # number of records to get per call
skip = 0  # number of records to skip

webUrl = urllib.request.urlopen(baseUrl + "&$inlinecount=allpages&$select=id&$top=1")
result = webUrl.read()
jsonData = json.loads(result.decode())

recCount = jsonData['metadata']['count']
loopNum = math.ceil(recCount / top)

print("START " + str(datetime.now()) + ", " + str(recCount) + " records, " + str(top) + " returned per call, " + str(
    loopNum) + " iterations needed.")

insurance_data = pd.DataFrame(
    columns=[
        'incidentType',
        'disasterNumber',
        'ihpAmount',
        'floodDamageAmount',
        'foundationDamageAmount',
        'roofDamageAmount'
    ]
)

session = requests.session()

def download_data(req_url, i):
    response = session.get(req_url)

    if response.status_code == 200:
        print("Iteration " + str(i) + " done")
        return pd.read_json(response.text)
    else:
        print(f"Error {response.status_code}: {response.text}")
        return

with concurrent.futures.ThreadPoolExecutor() as executor:

    results = [executor.submit(download_data(f"{baseUrl}&$metadata=off&$format=jsona&$skip={i*top}&$top={top}", i)) for i in range(loopNum)]

    for i, future in enumerate(concurrent.futures.as_completed(results)):
        insurance_data = pd.concat([insurance_data, future.result()], ignore_index=True)


print("END ")

insurance_data.to_csv('insurance_claims.csv', index=False)

finish = time.perf_counter()

print(f'Finished in {round(finish - start, 2)} seconds')

START 2023-01-26 23:53:26.995923, 5586380 records, 1000 returned per call, 5587 iterations needed.
Iteration 0 done
Iteration 1 done
Iteration 2 done
Iteration 3 done
Iteration 4 done
Iteration 5 done
Iteration 6 done
Iteration 7 done
Iteration 8 done
Iteration 9 done
Iteration 10 done
Iteration 11 done
Iteration 12 done
Iteration 13 done
Iteration 14 done
Iteration 15 done
Iteration 16 done
Iteration 17 done
Iteration 18 done
Iteration 19 done
Iteration 20 done
Iteration 21 done
Iteration 22 done
Iteration 23 done
Iteration 24 done
Iteration 25 done
Iteration 26 done
Iteration 27 done
Iteration 28 done
Iteration 29 done
Iteration 30 done
Iteration 31 done
Iteration 32 done
Iteration 33 done
Iteration 34 done
Iteration 35 done
Iteration 36 done
Iteration 37 done
Iteration 38 done
Iteration 39 done
Iteration 40 done
Iteration 41 done
Iteration 42 done
Iteration 43 done
Iteration 44 done


KeyboardInterrupt: 

In [20]:
import urllib.request
import numpy as np
import concurrent.futures
import pandas as pd
import requests
import json
import math
import time
from datetime import datetime

start = time.perf_counter()

baseUrl = "https://www.fema.gov/api/open/v1/IndividualsAndHouseholdsProgramValidRegistrations?$filter=damagedStateAbbreviation%20eq%20%27FL%27%20and%20incidentType%20eq%20%27Hurricane%27&$select=incidentType,disasterNumber,ihpAmount,floodDamageAmount,foundationDamageAmount,roofDamageAmount&$orderby=declarationDate"

top = 1000  # number of records to get per call
skip = 0  # number of records to skip

webUrl = urllib.request.urlopen(baseUrl + "&$inlinecount=allpages&$select=id&$top=1")
result = webUrl.read()
jsonData = json.loads(result.decode())

recCount = jsonData['metadata']['count']
loopNum = math.ceil(recCount / top)

print("START " + str(datetime.now()) + ", " + str(recCount) + " records, " + str(top) + " returned per call, " + str(
    loopNum) + " iterations needed.")

insurance_data = np.empty(loopNum, dtype=object)

session = requests.session()

def download_data(req_url, i):
    response = session.get(req_url)

    if response.status_code == 200:
        print("Iteration " + str(i) + " done")
        return pd.read_json(
            response.text,
            dtype={
                'incidentType':str,
                'disasterNumber':int,
                'ihpAmount':int,
                'floodDamageAmount':int,
                'foundationDamageAmount':int,
                'roofDamageAmount':int
            }
        )
    else:
        print(f"Error {response.status_code}: {response.text}")
        return

with concurrent.futures.ThreadPoolExecutor() as executor:

    results = [executor.submit(download_data(f"{baseUrl}&$metadata=off&$format=jsona&$skip={i*top}&$top={top}", i)) for i in range(loopNum)]

    for i, future in enumerate(concurrent.futures.as_completed(results)):
        insurance_data[i] = future.result()
        print("Iteration " + str(i) + " appended")

insurance_data = pd.concat(insurance_data, ignore_index=True)
insurance_data.to_csv('insurance_claims.csv', index=False)

finish = time.perf_counter()

print("END ")
print(f'Finished in {round(finish - start, 2)} seconds')

Unnamed: 0,incidentType,disasterNumber,ihpAmount,floodDamageAmount,foundationDamageAmount,roofDamageAmount
0,Hurricane,1539,0.00,0.0,0.0,0.0
1,Hurricane,1539,0.00,0.0,0.0,0.0
2,Hurricane,1539,0.00,0.0,0.0,0.0
3,Hurricane,1539,0.00,0.0,0.0,0.0
4,Hurricane,1539,0.00,0.0,0.0,0.0
...,...,...,...,...,...,...
995,Hurricane,1539,1327.97,0.0,0.0,1362.0
996,Hurricane,1539,0.00,0.0,0.0,0.0
997,Hurricane,1539,869.77,0.0,0.0,0.0
998,Hurricane,1539,0.00,0.0,0.0,749.8


In [None]:
import numpy as np
import concurrent.futures
import pandas as pd
import requests
import math
import time
from datetime import datetime

start = time.perf_counter()

session = requests.session()

baseUrl = "https://www.fema.gov/api/open/v1/IndividualsAndHouseholdsProgramValidRegistrations?$filter=damagedStateAbbreviation%20eq%20%27FL%27%20and%20incidentType%20eq%20%27Hurricane%27&$select=incidentType,disasterNumber,ihpAmount,floodDamageAmount,foundationDamageAmount,roofDamageAmount&$orderby=declarationDate"

top = 1000  # number of records to get per call
skip = 0  # number of records to skip

jsonData = session.get(baseUrl + "&$inlinecount=allpages&$select=id&$top=1").json()

recCount = jsonData['metadata']['count']
loopNum = math.ceil(recCount / top)

print("START " + str(datetime.now()) + ", " + str(recCount) + " records, " + str(top) + " returned per call, " + str(
    loopNum) + " iterations needed.")

insurance_data = np.empty(loopNum, dtype=object)

def download_data(req_url, i):
    response = session.get(req_url)

    if response.status_code == 200:
        print("Iteration " + str(i) + " done")
        return pd.read_json(
            response.text,
            dtype={
                'incidentType':str,
                'disasterNumber':int,
                'ihpAmount':int,
                'floodDamageAmount':int,
                'foundationDamageAmount':int,
                'roofDamageAmount':int
            }
        )
    else:
        print(f"Error {response.status_code}: {response.text}")
        return

with concurrent.futures.ThreadPoolExecutor() as executor:

    results = [executor.submit(download_data(f"{baseUrl}&$metadata=off&$format=jsona&$skip={i*top}&$top={top}", i)) for i in range(loopNum)]

    for i, future in enumerate(concurrent.futures.as_completed(results)):
        insurance_data[i] = future.result()
        print("Iteration " + str(i) + " appended")

insurance_data = pd.concat(insurance_data, ignore_index=True)
insurance_data.to_csv('insurance_claims.csv', index=False)

finish = time.perf_counter()

print("END ")
print(f'Finished in {round(finish - start, 2)} seconds')

In [3]:
import numpy as np
urls_array = np.array([f"{baseUrl}&$metadata=off&$format=jsona&$skip={i*top}&$top={top}" for i in range(loopNum)])

In [14]:
url = urls_array[1000]
url

'https://www.fema.gov/api/open/v1/IndividualsAndHouseholdsProgramValidRegistrations?$filter=damagedStateAbbreviation%20eq%20%27FL%27%20and%20incidentType%20eq%20%27Hurricane%27&$select=incidentType,disasterNumber,ihpAmount,floodDamageAmount,foundationDamageAmount,roofDamageAmount&$orderby=declarationDate&$metadata=off&$format=jsona&$skip=1000000&$top=1000'

In [15]:
substring = url[url.index("skip=")+5:url.rindex("&")]
substring

'1000000'

In [None]:
import numpy as np
import concurrent.futures
import pandas as pd
import requests
import math
import time
from datetime import datetime

start = time.perf_counter()

session = requests.session()

baseUrl = "https://www.fema.gov/api/open/v1/IndividualsAndHouseholdsProgramValidRegistrations?$filter=damagedStateAbbreviation%20eq%20%27FL%27%20and%20incidentType%20eq%20%27Hurricane%27&$select=incidentType,disasterNumber,ihpAmount,floodDamageAmount,foundationDamageAmount,roofDamageAmount&$orderby=declarationDate"

top = 1000  # number of records to get per call
skip = 0  # number of records to skip

jsonData = session.get(baseUrl + "&$inlinecount=allpages&$select=id&$top=1").json()

recCount = jsonData['metadata']['count']
loopNum = math.ceil(recCount / top)

print("START " + str(datetime.now()) + ", " + str(recCount) + " records, " + str(top) + " returned per call, " + str(
    loopNum) + " iterations needed.")

insurance_data = np.empty(loopNum, dtype=object)

def download_data(req_url):
    response = session.get(req_url)

    if response.status_code == 200:
        return pd.read_json(
            response.text,
            dtype={
                'incidentType':str,
                'disasterNumber':int,
                'ihpAmount':int,
                'floodDamageAmount':int,
                'foundationDamageAmount':int,
                'roofDamageAmount':int
            }
        )
    else:
        print(f"Error {response.status_code}: {response.text}")
        return

with concurrent.futures.ThreadPoolExecutor() as executor:
    urls_array = np.array([f"{baseUrl}&$metadata=off&$format=jsona&$skip={i*top}&$top={top}" for i in range(loopNum)])

    results = executor.map(download_data, urls_array)

    for i, f in results:
        insurance_data[i] = f
        print("Iteration " + str(i) + " appended")

insurance_data = pd.concat(insurance_data, ignore_index=True)
insurance_data.to_csv('insurance_claims.csv', index=False)

finish = time.perf_counter()

print("END ")
print(f'Finished in {round(finish - start, 2)} seconds')

In [2]:
import csv
import urllib.request
import math
import json
from datetime import datetime

# Base URL for this endpoint. Add filters, column selection, and sort order to this.
baseUrl = "https://www.fema.gov/api/open/v1/IndividualsAndHouseholdsProgramValidRegistrations?$filter=damagedStateAbbreviation%20eq%20%27FL%27%20and%20incidentType%20eq%20%27Hurricane%27&$select=incidentType,disasterNumber,ihpAmount,floodDamageAmount,foundationDamageAmount,roofDamageAmount&$orderby=declarationDate"

top = 1000  # number of records to get per call
skip = 0  # number of records to skip

# Return 1 record with your criteria to get total record count. Specifying only 1
#   column here to reduce amount of data returned. Need inlinecount to get record count.
webUrl = urllib.request.urlopen(baseUrl + "&$inlinecount=allpages&$select=id&$top=1")
result = webUrl.read()
jsonData = json.loads(result.decode())

In [3]:
jsonData

{'metadata': {'skip': 0,
  'filter': "damagedStateAbbreviation eq 'FL' and incidentType eq 'Hurricane'",
  'orderby': 'declarationDate ASC',
  'select': None,
  'rundate': '2023-01-29T04:36:45.044Z',
  'entityname': 'IndividualsAndHouseholdsProgramValidRegistrations',
  'version': 'v1',
  'top': 1,
  'count': 5586676,
  'format': 'json',
  'metadata': True,
  'url': '/api/open/v1/IndividualsAndHouseholdsProgramValidRegistrations?$filter=damagedStateAbbreviation%20eq%20%27FL%27%20and%20incidentType%20eq%20%27Hurricane%27&$select=incidentType,disasterNumber,ihpAmount,floodDamageAmount,foundationDamageAmount,roofDamageAmount&$orderby=declarationDate&$inlinecount=allpages&$select=id&$top=1'},
 'IndividualsAndHouseholdsProgramValidRegistrations': [{'incidentType': 'Hurricane',
   'disasterNumber': 1539,
   'ihpAmount': 0,
   'floodDamageAmount': 0,
   'foundationDamageAmount': 0,
   'roofDamageAmount': 1776,
   'id': '747157ef-a32b-4812-8ee7-4617f231bada'}]}

In [None]:
import csv
import urllib.request
import math
import json
import time
from datetime import datetime

# Base URL for this endpoint. Add filters, column selection, and sort order to this.
baseUrl = "https://www.fema.gov/api/open/v1/IndividualsAndHouseholdsProgramValidRegistrations?$filter=damagedStateAbbreviation%20eq%20%27FL%27%20and%20incidentType%20eq%20%27Hurricane%27&$select=incidentType,disasterNumber,ihpAmount,floodDamageAmount,foundationDamageAmount,roofDamageAmount"

top = 1000  # number of records to get per call
skip = 0  # number of records to skip

# Return 1 record with your criteria to get total record count
webUrl = urllib.request.urlopen(baseUrl + "&$inlinecount=allpages&$select=id&$top=1")
result = webUrl.read()
jsonData = json.loads(result.decode())

# calculate the number of calls we will need to get all of our data (using the maximum of 1000)
recCount = jsonData['metadata']['count']
loopNum = math.ceil(recCount / top)

start = time.perf_counter()

# send some logging info to the console so we know what is happening
print("START " + str(datetime.now()) + ", " + str(recCount) + " records, " + str(top) + " returned per call, " + str(
    loopNum) + " iterations needed.")


# Initialize our file
outFile = open("IndividualsAndHouseholdsProgramValidRegistrations.csv", "w", newline='')
writer = csv.writer(outFile)

# Write the header row to the file
writer.writerow(["incidentType", "disasterNumber", "ihpAmount", "floodDamageAmount", "foundationDamageAmount", "roofDamageAmount"])

# Loop and call the API endpoint changing the record start each iteration.
i = 0
while (i < 100):
    # By default data is returned as a CSV, if you want to begin working with and manipulating the CSV,
    # you can use csv.reader to read the csv file
    webUrl = urllib.request.urlopen(baseUrl + "&$format=csv" + "&$skip=" + str(skip) + "&$top=" + str(top))
    result = webUrl.read()
    csv_reader = csv.reader(result.decode().splitlines(), delimiter=',')
    next(csv_reader)  #skip the header row
    for row in csv_reader:
        writer.writerow(row)
    # increment the loop counter and skip value
    i += 1
    skip = i * top

    if i % 10 == 0:  # check if i is divisible by 100
        print("Iteration " + str(i) + " done")

finish = time.perf_counter()
print(f'Finished in {round(finish - start, 2)} seconds')

outFile.close()

In [5]:
import pandas as pd
# Create empty dataframe
df = pd.DataFrame(columns=["incidentType", "disasterNumber", "ihpAmount", "floodDamageAmount", "foundationDamageAmount", "roofDamageAmount"])
df

Unnamed: 0,incidentType,disasterNumber,ihpAmount,floodDamageAmount,foundationDamageAmount,roofDamageAmount


In [None]:
import csv
import urllib.request
import math
import pandas
import json
import time
from datetime import datetime

import pandas as pd

# Base URL for this endpoint. Add filters, column selection, and sort order to this.
baseUrl = "https://www.fema.gov/api/open/v1/IndividualsAndHouseholdsProgramValidRegistrations?$filter=damagedStateAbbreviation%20eq%20%27FL%27%20and%20incidentType%20eq%20%27Hurricane%27&$select=incidentType,disasterNumber,ihpAmount,floodDamageAmount,foundationDamageAmount,roofDamageAmount"

top = 1000  # number of records to get per call
skip = 0  # number of records to skip

# Return 1 record with your criteria to get total record count
webUrl = urllib.request.urlopen(baseUrl + "&$inlinecount=allpages&$select=id&$top=1")
result = webUrl.read()
jsonData = json.loads(result.decode())

# calculate the number of calls we will need to get all of our data (using the maximum of 1000)
recCount = jsonData['metadata']['count']
loopNum = math.ceil(recCount / top)

start = time.perf_counter()

# send some logging info to the console so we know what is happening
print("START " + str(datetime.now()) + ", " + str(recCount) + " records, " + str(top) + " returned per call, " + str(
    loopNum) + " iterations needed.")


# Create empty dataframe
df = pd.DataFrame(columns=["incidentType", "disasterNumber", "ihpAmount", "floodDamageAmount", "foundationDamageAmount", "roofDamageAmount"])

# Loop and call the API endpoint changing the record start each iteration.
i = 0
while (i < 200):
    # By default data is returned as a CSV, if you want to begin working with and manipulating the CSV,
    # you can use csv.reader to read the csv file
    file = urllib.request.urlopen(baseUrl + "&$format=csv" + "&$skip=" + str(skip) + "&$top=" + str(top))
    temp_df = pd.read_csv(file)
    df = pd.concat([df, temp_df], ignore_index=True)


    # increment the loop counter and skip value
    i += 1
    skip = i * top

    if i % 10 == 0:  # check if i is divisible by 100
        print("Iteration " + str(i) + " done")

df.to_csv('IndividualsAndHouseholdsProgramValidRegistrations.csv', index=False)

finish = time.perf_counter()
print(f'Finished in {round(finish - start, 2)} seconds')

In [None]:
import urllib.request
import math
import json
import time
from datetime import datetime
import pandas as pd

# Base URL for this endpoint. Add filters, column selection, and sort order to this.
baseUrl = "https://www.fema.gov/api/open/v1/IndividualsAndHouseholdsProgramValidRegistrations?$filter=damagedStateAbbreviation%20eq%20%27FL%27%20and%20incidentType%20eq%20%27Hurricane%27&$select=incidentType,disasterNumber,ihpAmount,floodDamageAmount,foundationDamageAmount,roofDamageAmount"

top = 1000  # number of records to get per call
skip = 0  # number of records to skip

# Return 1 record with your criteria to get total record count
webUrl = urllib.request.urlopen(baseUrl + "&$inlinecount=allpages&$select=id&$top=1")
result = webUrl.read()
jsonData = json.loads(result.decode())

# calculate the number of calls we will need to get all of our data (using the maximum of 1000)
recCount = jsonData['metadata']['count']
loopNum = math.ceil(recCount / top)

start = time.perf_counter()

# send some logging info to the console so we know what is happening
print("START " + str(datetime.now()) + ", " + str(recCount) + " records, " + str(top) + " returned per call, " + str(
    loopNum) + " iterations needed.")


# Create empty dataframe & csv file
df = pd.DataFrame(columns=["incidentType", "disasterNumber", "ihpAmount", "floodDamageAmount", "foundationDamageAmount", "roofDamageAmount"])
df.to_csv('IndividualsAndHouseholdsProgramValidRegistrations.csv', index=False, mode='w')

# Loop and call the API endpoint changing the record start each iteration.
i = 0
while (i < 200):
    # By default data is returned as a CSV, if you want to begin working with and manipulating the CSV,
    file = urllib.request.urlopen(baseUrl + "&$format=csv" + "&$skip=" + str(skip) + "&$top=" + str(top))
    temp_df = pd.read_csv(file, skiprows=1)
    temp_df.to_csv('IndividualsAndHouseholdsProgramValidRegistrations.csv', mode='a', index=False)

    # increment the loop counter and skip value
    i += 1
    skip = i * top

    if i % 10 == 0:  # check if i is divisible by 100
        print("Iteration " + str(i) + " done")

finish = time.perf_counter()
print(f'Finished in {round(finish - start, 2)} seconds')