<a href="https://colab.research.google.com/github/CE118/Chioma-s-Project/blob/main/Determinands_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import requests
import pandas as pd

# Predefined classification rules for determinands
classification_rules = {
    'Oxygen, Dissolved as O2': {
        'High': 'value >= 5.7',
        'Good': '4.0 <= value < 5.7',
        'Moderate': '2.4 <= value < 4.0',
        'Poor': '1.6 <= value < 2.4',
        'Bad': 'value < 1.6'
    },
    'Temperature of Water': {
        'High': 'value <= 15',
        'Good': '15 < value <= 20',
        'Moderate': '20 < value <= 25',
        'Poor': '25 < value <= 30',
        'Bad': 'value > 30'
    },
    'Conductivity at 25 C': {
        'High': 'value < 100',
        'Good': '100 <= value <= 250',
        'Moderate': '250 < value <= 800',
        'Poor': '800 < value <= 1500',
        'Bad': 'value > 1500'
    },
    'Ammoniacal Nitrogen as N': {
        'High': 'value <= 0.2',  # Assuming rivers type 1, 2, 4, and 6; adjust if river type info is available
        'Good': '0.2 < value <= 0.3',
        'Moderate': '0.3 < value <= 0.75',
        'Poor': '0.75 < value <= 1.1',
        'Bad': 'value > 1.1'
    },
    'Nitrogen, Total Oxidised as N': {
        'High': 'value < 1',
        'Good': '1 <= value <= 3',
        'Moderate': '3 < value <= 5',
        'Poor': '5 < value <= 10',
        'Bad': 'value > 10'
    },
    'Nitrate as N': {
        'High': 'value < 1',
        'Good': '1 <= value <= 5',
        'Moderate': '5 < value <= 10',
        'Poor': '10 < value <= 25',
        'Bad': 'value > 25'
    },
    'Nitrite as N': {
        'High': 'value < 0.01',
        'Good': '0.01 <= value <= 0.03',
        'Moderate': '0.03 < value <= 0.1',
        'Poor': '0.1 < value <= 0.2',
        'Bad': 'value > 0.2'
    },
    'Alkalinity to pH 4.5 as CaCO3': {
        'High': 'value > 200',
        'Good': '150 <= value <= 200',
        'Moderate': '100 <= value < 150',
        'Poor': '50 <= value < 100',
        'Bad': 'value < 50'
    },
    'Orthophosphate, reactive as P': {
        'High': 'value < 0.02',
        'Good': '0.02 <= value <= 0.05',
        'Moderate': '0.05 < value <= 0.1',
        'Poor': '0.1 < value <= 0.2',
        'Bad': 'value > 0.2'
    }

}



In [3]:
#For Sampling point 'Sheaf At Millhouses Park' with ID 'NE-49302001'

# Function to classify determinands based on predefined rules
def classify_value(value, rules):
    for category, condition in rules.items():
        if eval(condition):
            return category
    return 'Unknown'

# Input from the user for the determinand
determinant = input("Enter the determinand (e.g., 'Oxygen, Dissolved as O2', 'Temperature of Water', etc.): ")

# Check if the determinant has predefined rules
if determinant not in classification_rules:
    print(f"No predefined rules for '{determinant}'. Please add rules for this determinand.")
else:
    # Get the rules for the selected determinand
    rules = classification_rules[determinant]

    # URL of the API
    url = "http://environment.data.gov.uk/water-quality/batch/measurement"

    # Parameters including the sampling point ID
    params = {
        'samplingPoint': 'NE-49302001'
    }

    # Send a GET request to the API with the parameters
    response = requests.get(url, params=params)

    # Check if the request was successful
    if response.status_code == 200:
        # Save the raw response text to a CSV file first
        raw_csv_file = 'water_quality_measurements_raw.csv'
        with open(raw_csv_file, 'w') as file:
            file.write(response.text)

        # Load the raw CSV data into a pandas DataFrame
        df = pd.read_csv(raw_csv_file)

        # Filter the DataFrame for rows containing the specified determinand
        filtered_df = df[df['determinand.definition'].str.contains(determinant, na=False)]

        # Convert the 'sample.sampleDateTime' to datetime format and extract the year
        filtered_df['year'] = pd.to_datetime(filtered_df['sample.sampleDateTime']).dt.year

        # Calculate the annual averages for all years and round to two decimal places
        annual_averages = filtered_df.groupby('year')['result'].mean().round(2).reset_index()

        # Apply the classification to the annual averages using the predefined rules
        annual_averages['Classification'] = annual_averages['result'].apply(lambda x: classify_value(x, rules))

        # Display the classified data
        print(annual_averages)

    else:
        print(f"Failed to retrieve data. Status code: {response.status_code}")
        print("Response content:", response.text)


Enter the determinand (e.g., 'Oxygen, Dissolved as O2', 'Temperature of Water', etc.): Temperature of Water
    year  result Classification
0   2000    9.54           High
1   2001    8.96           High
2   2002   10.84           High
3   2003    9.04           High
4   2004   11.24           High
5   2005    9.79           High
6   2006    9.82           High
7   2007    9.54           High
8   2008    9.15           High
9   2009    9.39           High
10  2010    9.16           High
11  2011   10.92           High
12  2012   10.79           High
13  2013    8.10           High
14  2014   10.16           High
15  2015    8.57           High
16  2016    9.00           High
17  2017    6.42           High
18  2024   12.40           High


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['year'] = pd.to_datetime(filtered_df['sample.sampleDateTime']).dt.year


In [4]:
#This function takes Sampling ID and Year as inputs, then prints the classifications for the predefined determinands

# Function to classify based on predefined rules
def classify_value(value, rules):
    for category, condition in rules.items():
        if eval(condition):
            return category
    return 'Unknown'

# Get inputs from the user
sampling_point_id = input("Enter the sampling point ID (e.g., 'NE-49302001'): ")
selected_year = int(input("Enter the year you want to check the classifications for (e.g., 2015): "))

# URL of the API
url = "http://environment.data.gov.uk/water-quality/batch/measurement"

# Parameters including the sampling point ID
params = {
    'samplingPoint': sampling_point_id
}

# Send a GET request to the API with the parameters
response = requests.get(url, params=params)

# Check if the request was successful
if response.status_code == 200:
    # Directly save the response text to a CSV file since it appears to be CSV-formatted text
    with open('water_quality_measurements.csv', 'w') as file:
        file.write(response.text)
    print("Data has been successfully saved to water_quality_measurements.csv")

    # Load the CSV data into a pandas DataFrame
    df = pd.read_csv('water_quality_measurements.csv')

    # Check if 'determinand.definition' exists in the DataFrame columns
    if 'determinand.definition' not in df.columns:
        print("The column 'determinand.definition' is not found in the data. Please check the available columns.")
    else:
        # Initialize a DataFrame to hold the combined classification results
        combined_classifications = pd.DataFrame()

        # Loop through each determinand and perform classification
        for determinant, rules in classification_rules.items():
            # Filter the DataFrame for rows containing the specified determinand
            filtered_df = df[df['determinand.definition'].str.contains(determinant, na=False)].copy()

            # Convert the 'sample.sampleDateTime' to datetime format and extract the year
            filtered_df.loc[:, 'year'] = pd.to_datetime(filtered_df['sample.sampleDateTime']).dt.year

            # Calculate the annual averages for all years and round to two decimal places
            annual_averages = filtered_df.groupby('year')['result'].mean().round(2).reset_index()

            # Apply the classification to the annual averages using the predefined rules
            annual_averages[determinant] = annual_averages['result'].apply(lambda x: classify_value(x, rules))

            # Merge with combined classifications
            if combined_classifications.empty:
                combined_classifications = annual_averages[['year', determinant]]
            else:
                combined_classifications = pd.merge(combined_classifications, annual_averages[['year', determinant]], on='year', how='outer')

        # Filter the combined classification DataFrame for the selected year
        if selected_year in combined_classifications['year'].values:
            selected_year_data = combined_classifications[combined_classifications['year'] == selected_year]
            print(f"\nClassifications for the year {selected_year} at sampling point '{sampling_point_id}':\n")
            print(selected_year_data.to_string(index=False))
        else:
            print(f"No data available for the year {selected_year} at sampling point '{sampling_point_id}'.")

else:
    print(f"Failed to retrieve data. Status code: {response.status_code}")
    print("Response content:", response.text)


Enter the sampling point ID (e.g., 'NE-49302001'): NE-49302001
Enter the year you want to check the classifications for (e.g., 2015): 2021
Data has been successfully saved to water_quality_measurements.csv
No data available for the year 2021 at sampling point 'NE-49302001'.
