In [None]:
import requests # For making HTTP requests to the API.
import time # For adding a delay (so we don’t overload the API).
import pandas as pd # For handling tabular data and CSV.
import great_expectations as gx # For validating data quality.

In [None]:
base_url = "https://data.wa.gov/resource/f6w7-q2d2.json" # API endpoint for WA EV data.
limit = 1000 # Number of rows to fetch per request.
offset = 0 # Starting offset for pagination.
all_data = [] # Empty list to collect API results.

print("Fetching data from API...") # Log progress.

Fetching data from API...


In [6]:
while True: # Loop until no more data.
    url = f"{base_url}?$limit={limit}&$offset={offset}" # Build paginated URL.
    response = requests.get(url) # Send GET request.

    if response.status_code != 200: # If request fails.
        print(f"Error fetching data: {response.status_code}") # Print error.
        break # Exit loop.

    batch = response.json() # Parse JSON response.
    if not batch: # Stop if no data.
        break

    all_data.extend(batch) # Append batch into list.
    offset += limit # Increase offset for next page.

    print(f"Fetched {len(batch)} records | Total so far: {len(all_data)}") # progress log.

    time.sleep(1) # Delay to respect API limits.

print(f"Total records fetched: {len(all_data)}") # Final count after loop.

Fetched 1000 records | Total so far: 1000
Fetched 1000 records | Total so far: 2000
Fetched 1000 records | Total so far: 3000
Fetched 1000 records | Total so far: 4000
Fetched 1000 records | Total so far: 5000
Fetched 1000 records | Total so far: 6000
Fetched 1000 records | Total so far: 7000
Fetched 1000 records | Total so far: 8000
Fetched 1000 records | Total so far: 9000
Fetched 1000 records | Total so far: 10000
Fetched 1000 records | Total so far: 11000
Fetched 1000 records | Total so far: 12000
Fetched 1000 records | Total so far: 13000
Fetched 1000 records | Total so far: 14000
Fetched 1000 records | Total so far: 15000
Fetched 1000 records | Total so far: 16000
Fetched 1000 records | Total so far: 17000
Fetched 1000 records | Total so far: 18000
Fetched 1000 records | Total so far: 19000
Fetched 1000 records | Total so far: 20000
Fetched 1000 records | Total so far: 21000
Fetched 1000 records | Total so far: 22000
Fetched 1000 records | Total so far: 23000
Fetched 1000 records

In [9]:
csv_file = "D:\Ardi Kurniawan Kusuma\HACKTIV8\CODA_RMT_008\PHASE 2\WEEK 4\final-project-coda-008-rmt-group-1\Data\ev_population_data_raw.csv" # File name for saving CSV.
df = pd.DataFrame(all_data) # Convert list → DataFrame.
df.to_csv(csv_file, index=False) # Save DataFrame as CSV.
print(f"Data saved to {csv_file}") # Confirm save.

OSError: Cannot save file into a non-existent directory: 'D:\Ardi Kurniawan Kusuma\HACKTIV8\CODA_RMT_008\PHASE 2\WEEK 4inal-project-coda-008-rmt-group-1\Data'

In [None]:
df_reloaded = pd.read_csv(csv_file) # Reload CSV into DataFrame.
print("CSV Reloaded. Shape:", df_reloaded.shape) # Show shape of dataset.

In [None]:
# Convert the pandas DataFrame to a Great Expectations Dataset for validation.
# Create context.
context = FileDataContext.create(project_root_dir='./')

# Name Datasource.
datasource_name = 'ev_population_data_raw'

# Add function to manually overwrite datasource.
context.sources.delete(datasource_name)

# Create datasource.
datasource = context.sources.add_pandas(datasource_name)

# Name Data Asset.
asset_name = 'ev_population_data'

# File path for data validation.
path_to_data = './Data/ev_population_data_cleaned.csv'

# Create asset.
asset = datasource.add_csv_asset(asset_name, filepath_or_buffer=path_to_data)

# Create batch request.
batch_request = asset.build_batch_request()

In [None]:
# Create an expectation suite (a collection of expectations).
# Name expectation suite.
expectation_suite_name = "ev_population_suite"

# Create expectation suite.
context.add_or_update_expectation_suite(expectation_suite_name)

In [None]:
# Create a validator using above expectation suite.
validator = context.get_validator(
    batch_request = batch_request,
    expectation_suite_name = expectation_suite_name
)

# Check the validator.
validator.head()

In [None]:
# Expectation 1: Booking ID should be unique.
validator.expect_column_values_to_be_unique(column="Booking ID")

In [None]:
# Expectation 2: Ride Distance should be between a min_value and max_value.
validator.expect_column_values_to_be_between(column="Ride Distance", min_value=0, max_value=100)

In [None]:
# Expectation 3: Vehicle Type should be in a specific set of allowed values.
validator.expect_column_values_to_be_in_set(
    column="Vehicle Type",
    value_set=["Go Mini", "Go Sedan", "Auto", "eBike", "Bike", "Uber XL", "Premier Sedan"]
)

In [None]:
# Run all expectations defined on the validator.
validation_result = validator.validate()

# Print the validation results.
print(validation_result)