# Function Notebook

## Introduction
This notebook is designed to be a flexible and expandable template for developing and documenting functions for various tasks.

## Table of Contents
- [Configuration and Setup](#Configuration-and-Setu)
- [API Get Dataset no key](#Get-Data-No-ApiKey)
- ppendix)


## Configuration and Setup
Set up the environment with necessary libraries and configurations, Make sure you have all libraries installed under functions 

In [1]:
###################################################################
# Libraries used:
###################################################################
import numpy as np
import pandas as pd
import seaborn as sns
import folium
import matplotlib.pyplot as plt
import requests
import math
import tensorflow as tf
from io import StringIO
from geopy.distance import geodesic
from folium.plugins import MarkerCluster
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from haversine import haversine
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
import tkinter as tk
from tkinter import messagebox

## Get Data No ApiKey

In [2]:
def API_Unlimited(datasetname): # pass in dataset name and api key
    dataset_id = datasetname

    base_url = 'https://data.melbourne.vic.gov.au/api/explore/v2.1/catalog/datasets/'
    #apikey = api_key
    dataset_id = dataset_id
    format = 'csv'

    url = f'{base_url}{dataset_id}/exports/{format}'
    params = {
        'select': '*',
        'limit': -1,  # all records
        'lang': 'en',
        'timezone': 'UTC'
    }

    # GET request
    response = requests.get(url, params=params)

    if response.status_code == 200:
        # StringIO to read the CSV data
        url_content = response.content.decode('utf-8')
        datasetname = pd.read_csv(StringIO(url_content), delimiter=';')
        print(datasetname.sample(10, random_state=999)) # Test
        return datasetname 
    else:
        return (print(f'Request failed with status code {response.status_code}'))


"""
Get unlimited data from the API Function 

Parameters:
datasetname (string): dataset name as from city of melbourn 
apikey (string): the current api Key ( this should be gotton via the below if api stored in current workspace / google drive ( refer to Te API)

f = open("API.txt","r")
api_key = f.read()

Returns:
Csv : Returns the csv dataset of the dataset name 
"""


'\nGet unlimited data from the API Function \n\nParameters:\ndatasetname (string): dataset name as from city of melbourn \napikey (string): the current api Key ( this should be gotton via the below if api stored in current workspace / google drive ( refer to Te API)\n\nf = open("API.txt","r")\napi_key = f.read()\n\nReturns:\nCsv : Returns the csv dataset of the dataset name \n'

#### Testing :

In [3]:
dataset_id_1 = 'litter-traps'
dataset_id_2 = 'public-barbecues'
dataset_id_3 = 'cafes-and-restaurants-with-seating-capacity'
litter_df = API_Unlimited(dataset_id_1)
bbq_df = API_Unlimited(dataset_id_2)
cafe_df = API_Unlimited(dataset_id_3)

    asset_number                                  asset_description  \
34       1378100               SWD Litter Trap - Y25B - Bouverie St   
61       1426532                  SWD Litter Trap - Fitzroy Gardens   
24       1126516         SWD Litter Trap - Clarendon St, west trunk   
52       1126518                        SWD Litter Trap - Therry St   
35       1126526                         SWD Litter Trap - Henry St   
46       1378102                SWD Litter Trap - MP13B - Bayles St   
15       1378312  SWD Litter Trap - West side Wurundjeri Way, cn...   
55       1379349  SWD Litter Trap - Rakaia Way North cnr Docklan...   
62       1378105  SWD Litter Trap - Pearl River Road Sedimentati...   
9        1126517  SWD Litter Trap - Lauren St (between Arden St ...   

   construct_material_lupvalue inspection_frequency      maintained_by  \
34            Precast Concrete            Quarterly  City of Melbourne   
61            Precast Concrete            Quarterly  City of Melbourne

## Get Data ApiKey

#### Testing : 

# Pre-Processing Functions

## Dealing with NULL Values ( Finding Missing Data Count )

In [4]:
def FindMissingVal(df):
  #now lets have a array to store the feature with number of NAN values
  MissingFeaturenValues = []
  #now we check each column
  for column in df.columns:
    missingVals = np.sum(df[column].isnull()) # sum the number of NAN values into variable
    MissingFeaturenValues.append({'Feature':column ,'Number of Missing Values':missingVals}) #the array consist of dictionary with feature and its missing values
  return MissingFeaturenValues

"""
Function to get column names with count of missing values 

Parameters:
datasetname (string): dataset name as from city of melbourn 
apikey (string): the current api Key ( this should be gotton via the below if api stored in current workspace / google drive ( refer to Te API)

f = open("API.txt","r")
api_key = f.read()

Returns:
Csv : Returns the csv dataset of the dataset name 
"""

In [23]:
FindMissingVal(litter_df)

[{'Feature': 'asset_number', 'Number of Missing Values': 0},
 {'Feature': 'asset_description', 'Number of Missing Values': 0},
 {'Feature': 'construct_material_lupvalue', 'Number of Missing Values': 7},
 {'Feature': 'inspection_frequency', 'Number of Missing Values': 5},
 {'Feature': 'maintained_by', 'Number of Missing Values': 0},
 {'Feature': 'object_type_lupvalue', 'Number of Missing Values': 4},
 {'Feature': 'lat', 'Number of Missing Values': 0},
 {'Feature': 'lon', 'Number of Missing Values': 0},
 {'Feature': 'location', 'Number of Missing Values': 0}]

### Remove(drop), mean , median , mode 

In [37]:


def handle_null_values(dataset, columns, action):
    if action == 'remove':
        modified_dataset = dataset.dropna(subset=columns)
    elif action in ['mean', 'median', 'mode']:
        for column in columns:
            if dataset[column].isnull().any():  
                if action == 'mean':
                    fill_value = dataset[column].mean()
                elif action == 'median':
                    fill_value = dataset[column].median()
                elif action == 'mode':
                    fill_value = dataset[column].mode()[0]
                dataset[column] = dataset[column].fillna(fill_value)
        modified_dataset = dataset
    else:
        raise ValueError("Action must be 'remove', 'mean', 'median', or 'mode'")
    return modified_dataset

"""
Handling Missing Values Functions

Parameters:

dataset(dataframe) -  Dataframe you want to deal null values 
columns (array) - a array of all columns you want to handle missing values for the picked action
actions (string) - 'remove' , 'mode' , 'mean' , 'median' performs the said actions when selected ( can select one at a time )

Returns:
Dataframe : Returns Dataframe including handled values
"""



"\nHandling Missing Values Functions\n\nParameters:\n\ndataset(dataframe) -  Dataframe you want to deal null values \ncolumns (array) - a array of all columns you want to handle missing values for the picked action\nactions (string) - 'remove' , 'mode' , 'mean' , 'median' performs the said actions when selected ( can select one at a time )\n\nReturns:\nDataframe : Returns Dataframe including handled values\n"

#### Testing - I made a array of all columns i want to use mode on and ran function , returns to a new df called modified_mode

In [38]:

columns=['inspection_frequency','construct_material_lupvalue']
modified_mode = handle_null_values(litter_df,columns,'mode')


In [39]:
FindMissingVal(modified_mode)

[{'Feature': 'asset_number', 'Number of Missing Values': 0},
 {'Feature': 'asset_description', 'Number of Missing Values': 0},
 {'Feature': 'construct_material_lupvalue', 'Number of Missing Values': 0},
 {'Feature': 'inspection_frequency', 'Number of Missing Values': 0},
 {'Feature': 'maintained_by', 'Number of Missing Values': 0},
 {'Feature': 'object_type_lupvalue', 'Number of Missing Values': 4},
 {'Feature': 'lat', 'Number of Missing Values': 0},
 {'Feature': 'lon', 'Number of Missing Values': 0},
 {'Feature': 'location', 'Number of Missing Values': 0}]



## Extract coords

In [5]:
# Extract coordinates as tuples (latitude, longitude) as a list example :
"""
This rteally depends on how your dataset handdles the storing of the coordinates / location , sometimes you dont need to 
do this
"""

litter_coords = list(zip(litter_df['lat'], litter_df['lon'])) 

bbq_coords = [(float(c.split(',')[0]), float(c.split(',')[1])) for c in bbq_df['Co-ordinates']]

cafe_coords = list(zip(cafe_df['Latitude'], cafe_df['Longitude']))

## Point to point distance calculator minimum ( Thomas )


In [39]:
# Function to calculate the minimum distance from a point to any point in a list
"""
Calculate the minimum geodesic distance from a point to any point in a given list.

Parameters:
point (tuple): A tuple representing the coordinates (latitude, longitude) of the point.
list_of_points (list of tuples): A list of tuples, each representing coordinates (latitude, longitude) of points to compare against.

Returns:
float: The minimum Euclidean distance from the given point to the closest point in the list.
"""

def min_distance(point, list_of_points): 
    return min([geodesic(point, pt).meters for pt in list_of_points]) #get min dis

#example :


row = {'lat': 40.7128, 'lon': -74.0060}
# Call the lambda function with the row as an argument
value = lambda row: min_distance((row['lat'], row['lon']), bbq_coords)
# Get the result by calling the lambda function
result = value(row)
# Print the result
print("test distance in meters :",result)

# example used in dataset :


litter_df['Nearest BBQ Distance (m)'] = litter_df.apply(lambda row: min_distance((row['lat'], row['lon']), bbq_coords), axis=1)
#creates a new column for nearest distance to a point

test distance in meters : 16669936.382948814


## Point to point distance calculator maximum


In [7]:
# Function to calculate the maximum distance from a point to any point in a list
"""
Calculate the maximum geodesic distance from a point to any point in a given list.

Parameters:
point (tuple): A tuple representing the coordinates (latitude, longitude) of the point.
list_of_points (list of tuples): A list of tuples, each representing coordinates (latitude, longitude) of points to compare against.

Returns:
float: The maximum Euclidean distance from the given point to the closest point in the list.
"""

def max_distance(point, list_of_points): 
    return max([geodesic(point, pt).meters for pt in list_of_points]) #get min dis

#example : 

value = max_distance((row['lat'], row['lon']), bbq_coords) 

## Number of points in a given radius 

In [8]:
#Calculate the Number of points in a radius from a point 
"""
Calculate the number of geodesic distances from a point to any point in a given list.

Parameters:
center_point (tuple): A tuple representing the coordinates (latitude, longitude) of the point.
list_of_points (list of tuples): A list of tuples, each representing coordinates (latitude, longitude) of points to compare against.
radius_meters

Returns:
INT: The Number of points in the radius given
"""

def count_points_in_radius(center_point, list_of_points, radius_meters):
    count = sum(1 for pt in list_of_points if geodesic(center_point, pt).meters <= radius_meters)
    return count

#Example into dataset : 

#========Parameter 1 : Centur point 
#========Parameter 2 : all coordinate points [must be list form , see example ]
#========Parameter 3 : radius 

radius = 100
litter_df['Number of Nearby Points in Radius'] = litter_df.apply(lambda row: count_points_in_radius((row['lat'], row['lon']), bbq_coords + cafe_coords,radius),axis=1)

# Example ( singular ) :

values = count_points_in_radius((row['lat'], row['lon']),cafe_coords,radius)

## The Map using folium ( basic )


In [14]:
litter_df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 63 entries, 0 to 62
Data columns (total 11 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   asset_number                       63 non-null     int64  
 1   asset_description                  63 non-null     object 
 2   construct_material_lupvalue        56 non-null     object 
 3   inspection_frequency               58 non-null     object 
 4   maintained_by                      63 non-null     object 
 5   object_type_lupvalue               59 non-null     object 
 6   lat                                63 non-null     float64
 7   lon                                63 non-null     float64
 8   location                           63 non-null     object 
 9   Nearest BBQ Distance (m)           63 non-null     float64
 10  Number of Nearby Points in Radius  63 non-null     int64  
dtypes: float64(3), int64(2), object(6)
memory usage: 5.5+ KB


In [37]:
from folium.plugins import MarkerCluster

    
"""
Calculate the minimum geodesic distance from a point to any point in a given list.

Parameters:
dataframe : A datset representing the coordinates (latitude, longitude) of the index and also other values hence when
using this we can also include other things from the dataset in the map , when using the html legend

Returns:
Map: The folium based map is returned
"""

def map_func(PointsDatasets,):
    # Create a folium map centered at the mean coordinates of litter traps / intial setup
    map_center = [PointsDatasets['lat'].mean(), PointsDatasets['lon'].mean()]
    mymap = folium.Map(location=map_center, zoom_start=13)
    
    # Add circles for the points
    for index, row in PointsDatasets.iterrows():
        location = [row['lat'], row['lon']] 
        # Add a circle for the radius around the litter trap
        folium.Circle(
            location=location,
            radius=30,
            color='red',
            fill=True,
            fill_opacity=0.2
        ).add_to(mymap)
    return mymap

# Example usage ========================= Pass in your function =================
"""Make sure your dataframe has a column with both lat and lon"""
map_func(litter_df)