In [None]:
import requests
import pandas as pd
import os

def fetch_data(base_url, dataset, api_key, num_records=99, offset=0):
    all_records = []
    max_offset = 9900  # Maximum number of requests

    while True:
        # Maximum limit check
        if offset > max_offset:
            break

        # Create API request URL
        filters = f'{dataset}/records?limit={num_records}&offset={offset}'
        url = f'{base_url}{filters}&api_key={api_key}'

        # Start request
        try:
            result = requests.get(url, timeout=10)
            result.raise_for_status()
            records = result.json().get('results')
        except requests.exceptions.RequestException as e:
            raise Exception(f"API request failed: {e}")
        if records is None:
            break
        all_records.extend(records)
        if len(records) < num_records:
            break

        # Next cycle offset
        offset += num_records

    # DataFrame all data
    df = pd.DataFrame(all_records)
    return df

# Retrieve API key from environment variable
API_KEY = os.environ.get("API_KEY")
BASE_URL = 'https://data.melbourne.vic.gov.au/api/explore/v2.1/catalog/datasets/'

# Dataset names
FOOTPATH_STEEPNESS = 'footpath-steepness'
TREE_CANOPIES = 'tree-canopies-2021-urban-forest'

# Fetch data
footpath_steepness = fetch_data(BASE_URL, FOOTPATH_STEEPNESS, API_KEY)
tree_canopies = fetch_data(BASE_URL, TREE_CANOPIES, API_KEY)

# Display data
print("Footpath Steepness Data:")
print(footpath_steepness.head())

print("\nTree Canopies Data:")
print(tree_canopies.head())


In [2]:
# Data validation and cleaning steps
footpath_steepness.dropna(inplace=True)
tree_canopies.dropna(inplace=True)

# Additional data cleaning steps as required
# Example: Converting date columns to datetime format, handling duplicates, etc.

print("Footpath Steepness Data (Cleaned):")
print(footpath_steepness.info())

print("\nTree Canopies Data (Cleaned):")
print(tree_canopies.info())


Footpath Steepness Data (Cleaned):
<class 'pandas.core.frame.DataFrame'>
Index: 2444 entries, 7 to 9998
Data columns (total 15 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   geo_point_2d  2444 non-null   object 
 1   geo_shape     2444 non-null   object 
 2   grade1in      2444 non-null   float64
 3   gradepc       2444 non-null   float64
 4   segside       2444 non-null   object 
 5   statusid      2444 non-null   object 
 6   asset_type    2444 non-null   object 
 7   deltaz        2444 non-null   float64
 8   streetid      2444 non-null   float64
 9   mccid_int     2444 non-null   float64
 10  mcc_id        2444 non-null   int64  
 11  address       2444 non-null   object 
 12  rlmax         2444 non-null   float64
 13  rlmin         2444 non-null   float64
 14  distance      2444 non-null   float64
dtypes: float64(8), int64(1), object(6)
memory usage: 305.5+ KB
None

Tree Canopies Data (Cleaned):
<class 'pandas.core.frame.DataFr

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Preliminary data analysis
# Example: Descriptive statistics
print(footpath_steepness.describe())
print(tree_canopies.describe())

# Correlation analysis
correlation_matrix = footpath_steepness.corr()
sns.heatmap(correlation_matrix, annot=True)
plt.title("Footpath Steepness Correlation Matrix")
plt.show()

correlation_matrix = tree_canopies.corr()
sns.heatmap(correlation_matrix, annot=True)
plt.title("Tree Canopies Correlation Matrix")
plt.show()


In [None]:
# Detailed analysis
from scipy.stats import pearsonr

# Example: Correlation between footpath steepness and waste collection (dummy data)
waste_collection = pd.Series([100, 150, 200, 250, 300])  # Dummy data

corr, _ = pearsonr(footpath_steepness['steepness_column'], waste_collection)  # Replace 'steepness_column' with actual column name
print(f"Pearson correlation: {corr}")

# Additional statistical tests and model development as required
