### Downloading latest dataset using url requests

In [17]:
import requests
import pandas as pd
import os

# URL to the CSV file
csv_url = "https://www.fema.gov/about/reports-and-data/openfema/FimaNfipClaims.csv"

# Download the CSV file
response = requests.get(csv_url, stream=True)

# Ensure the request was successful
if response.status_code == 200:
    csv_filename = "FimaNfipClaims.csv"

    # Stream the content to a file
    with open(csv_filename, "wb") as file:
        for chunk in response.iter_content(chunk_size=10000):
            file.write(chunk)
    print("CSV download complete.")

    # Now read the downloaded CSV file into a DataFrame
    df = pd.read_csv(csv_filename, low_memory = False)

    # Convert the DataFrame to a Parquet file
    parquet_filename = 'FimaNfipClaims.parquet.gzip'
    df.to_parquet(parquet_filename, compression='gzip')
    print(f"Converted to Parquet file: {parquet_filename}")

    # Delete the CSV file
    os.remove(csv_filename)
    print(f"Deleted CSV file: {csv_filename}")

else:
    print(f"Failed to download the file: Status code {response.status_code}")

CSV download complete.
Converted to Parquet file: FimaNfipClaims.parquet.gzip
Deleted CSV file: FimaNfipClaims.csv


In [21]:
# Some preliminary checks

import datetime

# Function to get max 'asOfDate' from a DataFrame
def get_max_asOfDate(df):
    df['asOfDate'] = pd.to_datetime(df['asOfDate'])
    return df['asOfDate'].max()

# Function to check if new file is larger than the previous file
def is_file_larger(new_file, old_file):
    return os.path.getsize(new_file) > os.path.getsize(old_file)

In [18]:
df.shape[0]

2600579