In [1]:
import numpy as np
import pandas as pd
import networkx as nx
import plotly.graph_objects as go
import holoviews as hv
import tensorflow as tf
import json

from holoviews import opts
from bokeh.plotting import show
from plotly.offline import plot
from Detector import DriverDetector
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.metrics import RootMeanSquaredError
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import History
from tensorflow.keras.models import load_model

2025-03-31 00:44:12.640297: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
####################################################
# prep data by removing rows with unacceptable quality or null values
# as well as removing the quality columns
# save the data to a checkpoint file for testing and reusability
####################################################

data = pd.read_csv('Data.csv')
sensors=[]

for column in data.columns:
    if column.endswith("]"):
        sensors.append(column)

def Qual_Trim(df):
        
    for sensor in sensors:
        df = df.drop(df[df[sensor + ' Quality'] != 'Acceptable'].index)
        df = df.drop(df[df[sensor].isnull()].index)
        df = df.drop([sensor + ' Quality', sensor + ' Quality Last Modified'], axis=1)
    return(df)

data = Qual_Trim(data)

data.to_csv('checkpoint', sep=',', encoding='utf-8',index=False,float_format='%.2f')
sensors= sensors[2:]

In [None]:
####################################################
# take a subset of the data to run the detector, this still takes a long time and is not necessary to run the detector on the entire dataset, but be sure to take enough data to get a good result
# save results to a file for further analysis and later use, this means the detector does not have to be run every time
# 
####################################################
for sensor in sensors:
        
                try:
                        data = data.head(6000)
                        data = data.tail(500)
                        print(sensor)
                        mytargetSeries = data[sensor].to_numpy()
                        mypossibleDrivers = data.drop(["Datetime", sensor], axis='columns').astype(float)
                        DD = DriverDetector(mytargetSeries, mypossibleDrivers)
                        results=DD.analyse()
                        display(results)
        # Identify columns to drop based on values in the specified row
                        columns_to_drop = results.columns[results.iloc[0] == 'N']
        # Drop identified columns
                        net = results.drop(columns=columns_to_drop)
                        fileName = sensor.replace('/', '?')
                        results.to_csv('Data/' +fileName, sep=',', encoding='utf-8',index=False,float_format='%.2f')
        #DD.saveResults(path)
                except:
                        print(sensor + 'fail')


In [3]:
####################################################
# create a list of links between sensors based on the results of the detector
# save the list to a file for later use
####################################################
links = []

for sensor in sensors:
    try:
        
        fileName = sensor = sensor.replace('/', '?')
        data = pd.read_csv('Data/' + fileName)
        columns_with_Y = data.columns[data.loc[0] == 'Y']
        columns_with_Y = data.columns[pd.to_numeric(data.loc[2], errors='coerce').fillna(-float('inf')) > 0.8]

        row_2_values = data.loc[2, columns_with_Y]
        row_2_values.index = columns_with_Y
        for index in columns_with_Y:
            links.append([fileName,(index.replace('/', '?')),row_2_values[index]])
    except Exception as e:
        print(e)

links

with open('links.json', 'w') as f:
    json.dump(links, f)
    

[Errno 2] No such file or directory: 'Data/Relative Humidity (% RH) [Site]'
[Errno 2] No such file or directory: 'Data/Solar Radiation (W?m2) [Site]'


In [4]:
links

[['Flow (l?s) [Catchment 3]', 'Flow (l?s) [Catchment 6]', '0.89'],
 ['Flow (l?s) [Catchment 3]',
  'Soil Moisture @ 10cm Depth (%) [Catchment 8]',
  '0.9'],
 ['Flow (l?s) [Catchment 6]', 'Flow (l?s) [Catchment 3]', '0.87'],
 ['Flow (l?s) [Catchment 6]',
  'Soil Moisture @ 10cm Depth (%) [Catchment 13]',
  '0.89'],
 ['Flow (l?s) [Catchment 6]',
  'Soil Moisture @ 10cm Depth (%) [Catchment 14]',
  '0.82'],
 ['Soil Temperature @ 15cm Depth (oC) [Catchment 1]',
  'Flow (l?s) [Catchment 3]',
  '0.93'],
 ['Soil Temperature @ 15cm Depth (oC) [Catchment 1]',
  'Flow (l?s) [Catchment 6]',
  '0.98'],
 ['Soil Temperature @ 15cm Depth (oC) [Catchment 1]',
  'Air Temperature (oC) [Site]',
  '0.85'],
 ['Soil Temperature @ 15cm Depth (oC) [Catchment 1]',
  'Solar Radiation (W?m2) [Site]',
  '0.86'],
 ['Soil Temperature @ 15cm Depth (oC) [Catchment 1]',
  'Soil Temperature @ 15cm Depth (oC) [Catchment 2]',
  '0.99'],
 ['Soil Temperature @ 15cm Depth (oC) [Catchment 1]',
  'Soil Temperature @ 15cm Dept

In [None]:
# import os
# import pandas as pd
# import numpy as np
# import tensorflow as tf
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import InputLayer, LSTM, Dense
# from tensorflow.keras.callbacks import ModelCheckpoint
# from tensorflow.keras.losses import MeanSquaredError
# from tensorflow.keras.optimizers import Adam
# from tensorflow.keras.metrics import RootMeanSquaredError
# import difflib

# def normalize_string(s):
#     return ''.join(e for e in s if e.isalnum())

# def get_matching_columns(df, sensor):
#     matching_columns = []
#     for col in df.columns:
#         processed_col_name = col.replace(' ', '')
#         if sensor in processed_col_name:
#             matching_columns.append(col)
#     return matching_columns

# window_size = 5
# def df_to_X_y(df, window_size, Sensor):
#     df_as_np = df.to_numpy()
#     x = []
#     y = []
#     #myColumn = None

#     # Process DataFrame column names to match the Sensor variable
#     matching_columns = get_matching_columns(df, Sensor)
        
#     # Print matching columns for debugging
#     print(f"Matching columns for sensor '{Sensor}': {matching_columns}")
        
#     if matching_columns:
#         myColumn = df.columns.get_loc(matching_columns[-1])
#     else:
#         print('No matching column found for Sensor:', Sensor)
#         return None, None

#     for i in range(len(df_as_np)-window_size):
#         row = [a for a in df_as_np[i:i+window_size]]
#         x.append(row)
#         label = df_as_np[i+window_size][myColumn]
#         y.append(label)

#     return np.array(x), np.array(y)

# directory = os.fsencode('Data')

# for file, link in zip(os.listdir(directory), links):
#     sensor = os.fsdecode(file)
#     print(f"Processing sensor: {sensor}")

#     def find_unique_partners(links, sensor):
#         partners = {partner for pair in links for partner in pair if sensor in pair and partner != sensor}
#         return [partner.replace(' ', '') for partner in partners]


#     unique_partners = find_unique_partners(links, sensor)
#     unique_partners.insert(0, sensor)

#     # filter out elements that start with '0' as they are the correlation not the sensor
#     unique_partners = [item for item in unique_partners if not item.startswith('0')]
#     print(f"unique columns: {unique_partners}")
#     df = pd.read_csv('Data.csv')
#     df = df.fillna(-120)  # Replace null values with -120
#     for column in df.columns:
#         if column.endswith("]"):
#             sensors.append(column)
    
#     df = df.drop(df[df[column].isnull()].index)

#     # Print DataFrame columns for debugging
#     print(f"DataFrame columns: {df.columns.tolist()}")

#     # Split the sensor string into words
#     sensor_words = sensor.split()

#     # Process the columns
#     #processed_columns = [col.split('(')[0] + col.split(' ')[-1].split(']')[0] for col in df.columns]
#     #processed_columns = [col.replace(' ', '') for col in processed_columns]
#     processed_columns = [col for col in df.columns]
#     # Print processed columns for debugging
#     print(f"Processed columns: {processed_columns}")



#     def contains_all_letters_in_order(column, sensor):
#         it = iter(column)
#         return all(char in it for char in sensor)

#     filtered_columns = [col for col in df.columns if contains_all_letters_in_order(col, sensor)]

#     for column in df.columns:
#         if 'quality' not in column.lower() and sensor in column:
#             print(f"Column: {column}, Sensor: {sensor}")



# # Print filtered columns for debugging
#     filtered_columns = [col for col in filtered_columns if 'quality' not in col.lower()]
#     print(f"Filtered columns: {filtered_columns}")
#     # Filter the DataFrame to keep only the matched columns

#     df = df[filtered_columns]

#     # Keep only numeric columns
#     df = df.select_dtypes(include=[np.number])

#     # Call function and print shape to check
#     x, y = df_to_X_y(df, window_size, sensor)
#     if x is None or y is None:
#         print(f"Skipping sensor {sensor} due to missing data.")
#         continue

#     inputsize = x.shape[1:]
#     outputsize = y.shape[1:]

#     # Replace all non-numeric values with -120
#     x = np.where(np.isfinite(x), x, -120)
#     y = np.where(np.isfinite(y), y, -120)

#     # Ensure x and y contain only numeric data types
#     x = np.asarray(x).astype('float32')
#     y = np.asarray(y).astype('float32')

#     # Check for any non-numeric values in x and y
#     if not np.issubdtype(x.dtype, np.number):
#         non_numeric_x = x[~np.isfinite(x)]
#         print("Non-numeric values in x:", non_numeric_x)
#         raise ValueError("x contains non-numeric values")
#     if not np.issubdtype(y.dtype, np.number):
#         non_numeric_y = y[~np.isfinite(y)]
#         print("Non-numeric values in y:", non_numeric_y)
#         raise ValueError("y contains non-numeric values")
    
#     # Set some values to split the data 
#     trainSplit = round(x.shape[0]*0.75)
#     valSplit = round(x.shape[0]*0.9)
#     x_train, y_train = x[:trainSplit], y[:trainSplit]
#     X_val, y_val = x[trainSplit:valSplit], y[trainSplit:valSplit]
#     X_test, y_test = x[valSplit:], y[valSplit:]

#     # Convert numpy arrays to tensors
#     x_train = tf.convert_to_tensor(x_train, dtype=tf.float32)
#     y_train = tf.convert_to_tensor(y_train, dtype=tf.float32)
#     X_val = tf.convert_to_tensor(X_val, dtype=tf.float32)
#     y_val = tf.convert_to_tensor(y_val, dtype=tf.float32)

#     # Set arguments for training model
#     model1 = Sequential()
#     model1.add(InputLayer((inputsize)))
#     model1.add(LSTM(128))
#     model1.add(Dense(30, activation='relu'))
#     model1.add(Dense(8, activation='relu'))
#     model1.add(Dense(1, activation='linear'))
#     model1.summary()

#     # Create checkpoints in case current model isn't as accurate as previous epoch
#     cp = ModelCheckpoint('Models/' + sensor + '.keras', save_best_only=True)
#     model1.compile(loss=MeanSquaredError(), optimizer=Adam(learning_rate=0.001), metrics=[RootMeanSquaredError()])

#     # Train the model
#     model1.fit(x_train, y_train, validation_data=(X_val, y_val), epochs=20, callbacks=[cp])

#     farmData = pd.read_csv('MyData.csv').fillna(-120)  # Replace null values with -120
#     predictdf = farmData[filtered_columns]
#     for col in predictdf.columns:
#         if sensor in col.replace(' ', ''):
#             I = predictdf.columns.get_loc(col)

#     for col in farmData.columns:
#         if sensor in col.replace(' ', ''):
#             I2 = farmData.columns.get_loc(col)
         
#     for k in range(len(farmData) - window_size):
#         processed_col = col.replace(' ', '')
#         if farmData.iloc[k+window_size, I2+1] != 'Acceptable':
#             X, y = df_to_X_y(predictdf, window_size, sensor) 
#             predX = model1.predict(X[k:k+1], verbose=0)
#             nbr = predX[0][0]

#             farmData.iat[k+window_size, I2+1] = 'Model Data'
#             farmData.iat[k+window_size, I2] = nbr
#             predictdf.iat[k+window_size, I] = nbr    

#     # Save the unedited input CSV
#     df.to_csv('UneditedInput.csv', sep=',', encoding='utf-8', index=False, float_format='%.2f')

#     # Save the CSV with model-generated values replacing non-'Acceptable' values
#     farmData.to_csv('ModelGeneratedValues.csv', sep=',', encoding='utf-8', index=False, float_format='%.2f')

#     # Create and save the CSV comparing all values to model-generated values
#     comparison_df = farmData.copy()
#     for k in range(len(farmData) - window_size):
#         if abs(farmData.iat[k+window_size, I2] - predictdf.iat[k+window_size, I]) / farmData.iat[k+window_size, I2] > 0.24:
#             comparison_df.iat[k+window_size, I2+1] = 'uncertain'
#     comparison_df.to_csv('ComparisonToModel.csv', sep=',', encoding='utf-8', index=False, float_format='%.2f')

#     # Create and save the CSV with 'Sploosh' values replaced by model-generated values
#     sploosh_replaced_df = comparison_df.copy()
#     for k in range(len(farmData) - window_size):
#         if comparison_df.iat[k+window_size, I2+1] == 'uncertain':
#             sploosh_replaced_df.iat[k+window_size, I2] = predictdf.iat[k+window_size, I]
#     sploosh_replaced_df.to_csv('uncertainReplaced.csv', sep=',', encoding='utf-8', index=False, float_format='%.2f')

#     print(sensor + ' Done')

In [None]:
import os 
import pandas as pd
import numpy as np  
import tensorflow as tf  
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import InputLayer, LSTM, Dense 
from tensorflow.keras.callbacks import ModelCheckpoint 
from tensorflow.keras.losses import MeanSquaredError 
from tensorflow.keras.optimizers import Adam  
from tensorflow.keras.metrics import RootMeanSquaredError  
import difflib 

# Function to normalize a string by removing non-alphanumeric characters
def normalize_string(s):
    return ''.join(e for e in s if e.isalnum())

# Function to get columns from DataFrame that match a given sensor
def get_matching_columns(df, sensor):
    matching_columns = []
    for col in df.columns:
        processed_col_name = col
        if sensor.lower() in processed_col_name.lower():
            matching_columns.append(col)
    return matching_columns

window_size = 5  # Define the window size for the data

# Function to convert DataFrame to input (X) and output (y) arrays for the model
def df_to_X_y(df, window_size, Sensor):
    df_as_np = df.to_numpy()  # Convert DataFrame to numpy array
    x = []  # Initialize list for input data
    y = []  # Initialize list for output data

    # Get columns that match the sensor
    matching_columns = get_matching_columns(df, Sensor)
        
    if matching_columns:
        myColumn = df.columns.get_loc(matching_columns[-1])  # Get the index of the last matching column
    else:
        print('No matching column found for Sensor:', Sensor)
        return None, None

    for i in range(len(df_as_np)-window_size):
        row = [a for a in df_as_np[i:i+window_size]]  # Get a window of data
        x.append(row)
        label = df_as_np[i+window_size][myColumn]  # Get the label for the window
        y.append(label)

    return np.array(x), np.array(y)

directory = os.fsencode('Data')  # Encode the directory name
df = pd.read_csv('Input.csv')  # Read the CSV file into a DataFrame
df.to_csv('UneditedInput.csv', sep=',', encoding='utf-8', index=False, float_format='%.2f')  # Save the unedited input CSV
df.to_csv('ComparisonToModel.csv', sep=',', encoding='utf-8', index=False, float_format='%.2f')  # Save the comparison CSV
df.to_csv('uncertainReplaced.csv', sep=',', encoding='utf-8', index=False, float_format='%.2f')
df.to_csv('ModelGeneratedValues.csv', sep=',', encoding='utf-8', index=False, float_format='%.2f')  # Save the CSV with model-generated values

# Loop through files in the directory
for file, link in zip(os.listdir(directory), links):
    sensor = os.fsdecode(file)  # Decode the file name

    # Function to find unique partners for a sensor
    def find_unique_partners(links, sensor):
        partners = {partner for pair in links for partner in pair if sensor in pair and partner != sensor}
        return [partner.replace('/', '?') for partner in partners]

    unique_partners = find_unique_partners(links, sensor)  # Get unique partners for the sensor
    unique_partners.insert(0, sensor)

    # Filter out elements that start with '0' as they are the correlation not the sensor
    unique_partners = [item for item in unique_partners if not item.startswith('0')]

    df = pd.read_csv('Input.csv')  # Read the CSV file into a DataFrame
    df = df.fillna(method='ffill')  # Replace null values with the value from the previous row

    for column in df.columns:
        if column.endswith("]"):
            sensors.append(column)  # Append columns ending with "]" to sensors list
    
    df = df.drop(df[df[column].isnull()].index)  # Drop rows with null values in the specified column

    processed_columns = [col for col in df.columns]
    processed_columns = [col for col in processed_columns if 'Quality' not in col]
    for col in processed_columns:
        if 'Quality' in col:
            processed_columns.remove(col)
        if 'Datetime' in col:
            processed_columns.remove(col)
    
    df = df[processed_columns]  # Filter the DataFrame to keep only the matched columns
    df = df.select_dtypes(include=[np.number])  # Keep only numeric columns

    # Call function and print shape to check
    x, y = df_to_X_y(df, window_size, sensor)
    if x is None or y is None:
        print(f"Skipping sensor {sensor} due to missing data.")
        continue

    inputsize = x.shape[1:]  # Get the input size
    outputsize = y.shape[1:]  # Get the output size

    x = np.where(np.isfinite(x), x, -120)  # Replace all non-numeric values with -120
    y = np.where(np.isfinite(y), y, -120)  

    x = np.asarray(x).astype('float32')  # Ensure x contains only numeric data types
    y = np.asarray(y).astype('float32')  

    if not np.issubdtype(x.dtype, np.number):
        non_numeric_x = x[~np.isfinite(x)]
        print("Non-numeric values in x:", non_numeric_x)
        raise ValueError("x contains non-numeric values")
    if not np.issubdtype(y.dtype, np.number):
        non_numeric_y = y[~np.isfinite(y)]
        print("Non-numeric values in y:", non_numeric_y)
        raise ValueError("y contains non-numeric values")
    
    trainSplit = round(x.shape[0]*0.75)  # Set the training split
    valSplit = round(x.shape[0]*0.9)  # Set the validation split
    x_train, y_train = x[:trainSplit], y[:trainSplit]
    X_val, y_val = x[trainSplit:valSplit], y[trainSplit:valSplit]
    X_test, y_test = x[valSplit:], y[valSplit:]

    x_train = tf.convert_to_tensor(x_train, dtype=tf.float32)
    y_train = tf.convert_to_tensor(y_train, dtype=tf.float32)
    X_val = tf.convert_to_tensor(X_val, dtype=tf.float32)
    y_val = tf.convert_to_tensor(y_val, dtype=tf.float32)

    model1 = Sequential()
    model1.add(InputLayer((inputsize)))
    model1.add(LSTM(128))
    model1.add(Dense(30, activation='relu'))
    model1.add(Dense(8, activation='relu'))
    model1.add(Dense(1, activation='linear'))

    cp = ModelCheckpoint('Models/' + sensor + '.keras', save_best_only=True)
    model1.compile(loss=MeanSquaredError(), optimizer=Adam(learning_rate=0.001), metrics=[RootMeanSquaredError()])

    model1.fit(x_train, y_train, validation_data=(X_val, y_val), epochs=50, callbacks=[cp], verbose=0)

    farmData = pd.read_csv('Input.csv').fillna(method='ffill')  # Replace null values with the value from the previous row
    predictdf = farmData[processed_columns]
    for col in predictdf.columns:
        if sensor.replace('/', '?') in col.replace('/', '?'):
            I = predictdf.columns.get_loc(col)

    for col in farmData.columns:
        if sensor.replace('/', '?') in col.replace('/', '?'):
            I2 = farmData.columns.get_loc(col)-3

    comparison_df = pd.read_csv('ComparisonToModel.csv').astype('object')
    farmData = pd.read_csv('ModelGeneratedValues.csv').astype('object')
    replaced_df = pd.read_csv('uncertainReplaced.csv').astype('object')

    for k in range(len(farmData) - window_size):
        try:
            X, y = df_to_X_y(predictdf, window_size, sensor)
            predX = model1.predict(X[k:k+1].astype('float32'), verbose=0)
            nbr = predX[0][0]
        except Exception as e:
            print(sensor + 'prediction error')
            print(e)
        
        try:
            if farmData.iloc[k+window_size, I2+2] != 'Acceptable':
                farmData.iat[k+window_size, I2+2] = 'Infilled'
                farmData.iat[k+window_size, I2+1] = nbr  
                comparison_df.iat[k+window_size, I2+2] = 'Infilled'
                comparison_df.iat[k+window_size, I2+1] = nbr  
                replaced_df.iat[k+window_size, I2+2] = 'Infilled'
                replaced_df.iat[k+window_size, I2+1] = nbr  
                predictdf.iat[k+window_size, I] = nbr  
        except Exception as e:
            print(sensor + 'data error')
            print(e)

        try:
            if abs(nbr-farmData.iat[k+window_size, I2+1])/farmData.iat[k+window_size, I2+1] > 0.15:
                comparison_df.iat[k+window_size, I2+2] = 'uncertain'
                replaced_df.iat[k+window_size, I2+2] = 'uncertain'
        except Exception as e:
            print(sensor + 'comparison error')
            print(e)

        try:
            if replaced_df.iat[k+window_size, I2+2] == 'uncertain':
                replaced_df.iat[k+window_size, I2+1] = nbr
                replaced_df.iat[k+window_size, I2+2] = 'Replaced'
        except Exception as e:
            print(sensor + 'replacement error')
            print(e)
    print(model1.summary())
    df.to_csv('UneditedInput.csv', sep=',', encoding='utf-8', index=False, float_format='%.2f')
    farmData.to_csv('ModelGeneratedValues.csv', sep=',', encoding='utf-8', index=False, float_format='%.2f')
    comparison_df.to_csv('ComparisonToModel.csv', sep=',', encoding='utf-8', index=False, float_format='%.2f')
    replaced_df.to_csv('uncertainReplaced.csv', sep=',', encoding='utf-8', index=False, float_format='%.2f')

    print(sensor + ' Done')



In [5]:
import os 
import pandas as pd
import numpy as np  
import tensorflow as tf  
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import InputLayer, LSTM, Dense, Dropout, BatchNormalization, Bidirectional
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.losses import Huber
from tensorflow.keras.optimizers import Adam, RMSprop, Nadam
from tensorflow.keras.metrics import RootMeanSquaredError  
import difflib 

# Function to normalize a string by removing non-alphanumeric characters
def normalize_string(s):
    return ''.join(e for e in s if e.isalnum())

# Function to get columns from DataFrame that match a given sensor
def get_matching_columns(df, sensor):
    matching_columns = []
    for col in df.columns:
        processed_col_name = col
        if sensor.lower() in processed_col_name.lower():
            matching_columns.append(col)
    return matching_columns

window_size = 5  # Define the window size for the data

# Function to convert DataFrame to input (X) and output (y) arrays for the model
# Function to convert DataFrame to input (X) and output (y) arrays for the model
def df_to_X_y(df, window_size, Sensor, output_weight=1.5):
    df_as_np = df.to_numpy()  # Convert DataFrame to numpy array
    x = []  # Initialize list for input data
    y = []  # Initialize list for output data

    # Get columns that match the sensor
    matching_columns = get_matching_columns(df, Sensor)
        
    if matching_columns:
        myColumn = df.columns.get_loc(matching_columns[-1])  # Get the index of the last matching column
    else:
        print('No matching column found for Sensor:', Sensor)
        return None, None

    for i in range(len(df_as_np) - window_size):
        row = df_as_np[i:i+window_size].copy()  # Include all columns in the input window
        x.append(row)
        label = df_as_np[i+window_size][myColumn]  # Get the label for the window
        y.append(label)

    return np.array(x), np.array(y)

directory = os.fsencode('Data')  # Encode the directory name
df = pd.read_csv('Input.csv')  # Read the CSV file into a DataFrame
df = df.ffill()  # Replace null values with the value from the previous row
df.to_csv('UneditedInput.csv', sep=',', encoding='utf-8', index=False, float_format='%.2f')  # Save the unedited input CSV
df.to_csv('ComparisonToModel.csv', sep=',', encoding='utf-8', index=False, float_format='%.2f')  # Save the comparison CSV
df.to_csv('uncertainReplaced.csv', sep=',', encoding='utf-8', index=False, float_format='%.2f')
df.to_csv('ModelGeneratedValues.csv', sep=',', encoding='utf-8', index=False, float_format='%.2f')  # Save the CSV with model-generated values

# Loop through files in the directory
for file, link in zip(os.listdir(directory), links):
    sensor = os.fsdecode(file).replace('?','/')  # Decode the file name

    # Function to find unique partners for a sensor
    def find_unique_partners(links, sensor):
        partners = {partner for pair in links for partner in pair if sensor in pair and partner != sensor}
        return [partner.replace('/', '?') for partner in partners]

    unique_partners = find_unique_partners(links, sensor)  # Get unique partners for the sensor
    unique_partners.insert(0, sensor)

    # Filter out elements that start with '0' as they are the correlation not the sensor
    unique_partners = [item for item in unique_partners if not item.startswith('0')]

    df = pd.read_csv('Input.csv')  # Read the CSV file into a DataFrame
    df = df.ffill()  # Replace null values with the value from the previous row

    for column in df.columns:
        if column.endswith("]"):
            sensors.append(column)  # Append columns ending with "]" to sensors list
    
    df = df.drop(df[df[column].isnull()].index)  # Drop rows with null values in the specified column

    processed_columns = [col for col in df.columns]
    processed_columns = [col for col in processed_columns if 'Quality' not in col]
    for col in processed_columns:
        if 'Quality' in col:
            processed_columns.remove(col)
        if 'Datetime' in col:
            processed_columns.remove(col)
    
    df = df[processed_columns]  # Filter the DataFrame to keep only the matched columns
    df = df.select_dtypes(include=[np.number])  # Keep only numeric columns

    # Call function and print shape to check
    x, y = df_to_X_y(df, window_size, sensor)
    if x is None or y is None:
        print(f"Skipping sensor {sensor} due to missing data.")
        continue

    inputsize = x.shape[1:]  # Get the input size
    outputsize = y.shape[1:]  # Get the output size

    x = np.where(np.isfinite(x), x, -120)  # Replace all non-numeric values with -120
    y = np.where(np.isfinite(y), y, -120)  

    x = np.asarray(x).astype('float32')  # Ensure x contains only numeric data types
    y = np.asarray(y).astype('float32')  

    if not np.issubdtype(x.dtype, np.number):
        non_numeric_x = x[~np.isfinite(x)]
        print("Non-numeric values in x:", non_numeric_x)
        raise ValueError("x contains non-numeric values")
    if not np.issubdtype(y.dtype, np.number):
        non_numeric_y = y[~np.isfinite(y)]
        print("Non-numeric values in y:", non_numeric_y)
        raise ValueError("y contains non-numeric values")
    
    trainSplit = round(x.shape[0]*0.75)  # Set the training split
    valSplit = round(x.shape[0]*0.9)  # Set the validation split
    x_train, y_train = x[:trainSplit], y[:trainSplit]
    X_val, y_val = x[trainSplit:valSplit], y[trainSplit:valSplit]
    X_test, y_test = x[valSplit:], y[valSplit:]

    x_train = tf.convert_to_tensor(x_train, dtype=tf.float32)
    y_train = tf.convert_to_tensor(y_train, dtype=tf.float32)
    X_val = tf.convert_to_tensor(X_val, dtype=tf.float32)
    y_val = tf.convert_to_tensor(y_val, dtype=tf.float32)

    # Updated model definition
    model1 = Sequential()

    # Input layer
    model1.add(InputLayer((inputsize)))

    # Bidirectional LSTM layers
    model1.add(Bidirectional(LSTM(256, return_sequences=True)))  # Bidirectional LSTM
    model1.add(Dropout(0.5))  # Increased dropout for better regularization
    model1.add(Bidirectional(LSTM(128)))  # Second Bidirectional LSTM
    model1.add(Dropout(0.5))

    # Batch normalization for faster convergence
    model1.add(BatchNormalization())

    # Fully connected layers
    model1.add(Dense(64, activation='relu'))  # Added another dense layer
    model1.add(Dense(16, activation='relu'))  # Added another dense layer
    model1.add(Dense(8, activation='relu'))  # Added another dense layer
    model1.add(Dense(1, activation='linear'))  # Output layer

    # Compile the model
    cp = ModelCheckpoint('Models/' + sensor + '.keras', save_best_only=True)
    lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=20, min_lr=1e-25)  # Increased patience to 10
    early_stopping = EarlyStopping(monitor='val_loss', patience=100, restore_best_weights=True)  # Increased patience to 20

    model1.compile(
        loss=Huber(),  # Huber loss for better handling of outliers
        optimizer=RMSprop(learning_rate=0.001),  # Lower learning rate
        metrics=[RootMeanSquaredError()]
    )

    model1.fit(
        x_train, y_train,
        validation_data=(X_val, y_val),
        epochs=50000,
        callbacks=[cp, lr_scheduler, early_stopping],
        verbose=1
    )

    farmData = pd.read_csv('Input.csv').ffill()  # Replace null values with the value from the previous row
    predictdf = farmData[processed_columns]
    for col in predictdf.columns:
        if sensor.replace('/', '?') in col.replace('/', '?'):
            I = predictdf.columns.get_loc(col)

    for col in farmData.columns:
        if sensor.replace('/', '?') in col.replace('/', '?'):
            I2 = farmData.columns.get_loc(col)-3

    comparison_df = pd.read_csv('ComparisonToModel.csv').astype('object')
    farmData = pd.read_csv('ModelGeneratedValues.csv').astype('object')
    replaced_df = pd.read_csv('uncertainReplaced.csv').astype('object')
    new_df = replaced_df.copy()

    for k in range(len(farmData) - window_size):
        try:
            X, y = df_to_X_y(predictdf, window_size, sensor)
            predX = model1.predict(X[k:k+1].astype('float32'), verbose=0)
            nbr = predX[0][0]
        except Exception as e:
            print(sensor + 'prediction error')
            print(e)
        
        try:
            if farmData.iloc[k+window_size, I2+2] != 'Acceptable':
                farmData.iat[k+window_size, I2+2] = 'Infilled'
                farmData.iat[k+window_size, I2+1] = nbr
                comparison_df.iat[k+window_size, I2+2] = 'Infilled'
                comparison_df.iat[k+window_size, I2+1] = nbr  
                replaced_df.iat[k+window_size, I2+2] = 'Infilled'
                replaced_df.iat[k+window_size, I2+1] = nbr  
                predictdf.iat[k+window_size, I] = nbr  
        except Exception as e:
            print(sensor + 'data error')
            print(e)

        try:
            if abs(nbr-farmData.iat[k+window_size, I2+1])/farmData.iat[k+window_size, I2+1] > 0.05:
                comparison_df.iat[k+window_size, I2+2] = 'uncertain'
                replaced_df.iat[k+window_size, I2+2] = 'uncertain'
        except Exception as e:
            print(sensor + 'comparison error')
            print(e)

        try:
            if replaced_df.iat[k+window_size, I2+2] == 'uncertain':
                replaced_df.iat[k+window_size, I2+1] = nbr
                replaced_df.iat[k+window_size, I2+2] = 'Replaced'
        except Exception as e:
            print(sensor + 'replacement error')
            print(e)

        try:
            new_df.iat[k+window_size, I2+1] = nbr
        except Exception as e:            
            print(sensor + 'new error')
            print(e)
    df.to_csv('UneditedInput.csv', sep=',', encoding='utf-8', index=False, float_format='%.2f')
    farmData.to_csv('ModelGeneratedValues.csv', sep=',', encoding='utf-8', index=False, float_format='%.2f')
    comparison_df.to_csv('ComparisonToModel.csv', sep=',', encoding='utf-8', index=False, float_format='%.2f')
    replaced_df.to_csv('uncertainReplaced.csv', sep=',', encoding='utf-8', index=False, float_format='%.2f')
    new_df.to_csv('new_df.csv', sep=',', encoding='utf-8', index=False, float_format='%.2f')

    print(sensor + ' Done')

Epoch 1/50000
Epoch 2/50000
Epoch 3/50000
Epoch 4/50000
Epoch 5/50000
Epoch 6/50000
Epoch 7/50000
Epoch 8/50000
Epoch 9/50000
Epoch 10/50000
Epoch 11/50000
Epoch 12/50000
Epoch 13/50000
Epoch 14/50000
Epoch 15/50000
Epoch 16/50000
Epoch 17/50000
Epoch 18/50000
Epoch 19/50000
Epoch 20/50000
Epoch 21/50000
Epoch 22/50000
Epoch 23/50000
Epoch 24/50000
Epoch 25/50000
Epoch 26/50000
Epoch 27/50000
Epoch 28/50000
Epoch 29/50000
Epoch 30/50000
Epoch 31/50000
Epoch 32/50000
Epoch 33/50000
Epoch 34/50000
Epoch 35/50000
Epoch 36/50000
Epoch 37/50000
Epoch 38/50000
Epoch 39/50000
Epoch 40/50000
Epoch 41/50000
Epoch 42/50000
Epoch 43/50000
Epoch 44/50000
Epoch 45/50000
Epoch 46/50000
Epoch 47/50000
Epoch 48/50000
Epoch 49/50000
Epoch 50/50000
Epoch 51/50000
Epoch 52/50000
Epoch 53/50000
Epoch 54/50000
Epoch 55/50000
Epoch 56/50000
Epoch 57/50000
Epoch 58/50000
Epoch 59/50000
Epoch 60/50000
Epoch 61/50000
Epoch 62/50000
Epoch 63/50000
Epoch 64/50000
Epoch 65/50000
Epoch 66/50000
Epoch 67/50000
Epoc

KeyboardInterrupt: 

In [None]:
# Load the last trained model
from tensorflow.keras.models import load_model
import matplotlib.pyplot as plt

# Define the path to the last trained model
last_model_path = 'Models/' + sensor + '.keras'

# Load the model
model = load_model(last_model_path)

# Evaluate the model on the test set
test_loss, test_rmse = model.evaluate(X_test, y_test, verbose=1)
print(f"Test Loss: {test_loss}")
print(f"Test RMSE: {test_rmse}")

# Make predictions on the test set
y_pred = model.predict(X_test)

# Plot the true vs predicted values
plt.figure(figsize=(10, 6))
plt.plot(y_test, label='True Values', alpha=0.7)
plt.plot(y_pred, label='Predicted Values', alpha=0.7)
plt.title('True vs Predicted Values')
plt.xlabel('Sample Index')
plt.ylabel('Value')
plt.legend()
plt.show()

In [6]:
import os 
import pandas as pd
import numpy as np  
import tensorflow as tf  
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import InputLayer, LSTM, Dense, Dropout, BatchNormalization, Bidirectional
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.losses import Huber
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.metrics import RootMeanSquaredError  

# Function to normalize a string by removing non-alphanumeric characters
def normalize_string(s):
    return ''.join(e for e in s if e.isalnum())

# Function to get columns from DataFrame that match a given sensor
def get_matching_columns(df, sensor):
    matching_columns = []
    for col in df.columns:
        processed_col_name = col
        if sensor.lower() in processed_col_name.lower():
            matching_columns.append(col)
    return matching_columns

window_size = 5  # Define the window size for the data

# Function to convert DataFrame to input (X) and output (y) arrays for the model
def df_to_X_y(df, window_size, Sensor, output_weight=1.5):
    df_as_np = df.to_numpy()  # Convert DataFrame to numpy array
    x = []  # Initialize list for input data
    y = []  # Initialize list for output data

    # Get columns that match the sensor
    matching_columns = get_matching_columns(df, Sensor)
        
    if matching_columns:
        myColumn = df.columns.get_loc(matching_columns[-1])  # Get the index of the last matching column
    else:
        print('No matching column found for Sensor:', Sensor)
        return None, None

    for i in range(len(df_as_np) - window_size):
        row = df_as_np[i:i+window_size].copy()  # Include all columns in the input window
        x.append(row)
        label = df_as_np[i+window_size][myColumn]  # Get the label for the window
        y.append(label)

    return np.array(x), np.array(y)

# Read the input data
df = pd.read_csv('Input.csv')  # Read the CSV file into a DataFrame
df = df.ffill()  # Replace null values with the value from the previous row
df = df.select_dtypes(include=[np.number])  # Keep only numeric columns
df.to_csv('fileA.csv', sep=',', encoding='utf-8', index=False, float_format='%.2f')  # Save the unedited input CSV
df.to_csv('fileB.csv', sep=',', encoding='utf-8', index=False, float_format='%.2f')  # Save the comparison CSV
df.to_csv('fileC.csv', sep=',', encoding='utf-8', index=False, float_format='%.2f')
df.to_csv('fileD.csv', sep=',', encoding='utf-8', index=False, float_format='%.2f')  # Save the CSV with model-generated values

# Loop through each sensor (column) in the DataFrame
for sensor in df.columns:
    print(f"Processing sensor: {sensor}")

    # Prepare data for the current sensor
    x, y = df_to_X_y(df, window_size, Sensor=sensor)  # Use df_to_X_y to create inputs and outputs
    if x is None or y is None:
        print(f"Skipping sensor {sensor} due to missing data.")
        continue

    inputsize = x.shape[1:]  # Get the input size
    outputsize = y.shape[1:]  # Get the output size

    x = np.asarray(x).astype('float32')  # Ensure x contains only numeric data types
    y = np.asarray(y).astype('float32')

    trainSplit = round(x.shape[0] * 0.75)  # Set the training split
    valSplit = round(x.shape[0] * 0.9)  # Set the validation split
    x_train, y_train = x[:trainSplit], y[:trainSplit]
    X_val, y_val = x[trainSplit:valSplit], y[trainSplit:valSplit]
    X_test, y_test = x[valSplit:], y[valSplit:]

    x_train = tf.convert_to_tensor(x_train, dtype=tf.float32)
    y_train = tf.convert_to_tensor(y_train, dtype=tf.float32)
    X_val = tf.convert_to_tensor(X_val, dtype=tf.float32)
    y_val = tf.convert_to_tensor(y_val, dtype=tf.float32)

    # Define the model
    model1 = Sequential()

    # Input layer
    model1.add(InputLayer((inputsize)))

    # Bidirectional LSTM layers
    model1.add(Bidirectional(LSTM(256, return_sequences=True)))  # Bidirectional LSTM
    model1.add(Dropout(0.5))  # Increased dropout for better regularization
    model1.add(Bidirectional(LSTM(128)))  # Second Bidirectional LSTM
    model1.add(Dropout(0.5))

    # Batch normalization for faster convergence
    model1.add(BatchNormalization())

    # Fully connected layers
    model1.add(Dense(64, activation='relu'))  # Added another dense layer
    model1.add(Dense(16, activation='relu'))  # Added another dense layer
    model1.add(Dense(8, activation='relu'))  # Added another dense layer
    model1.add(Dense(1, activation='linear'))  # Output layer

    # Compile the model
    cp = ModelCheckpoint('Models/' + sensor + '.keras', save_best_only=True)
    lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=20, min_lr=1e-25)  # Increased patience to 10
    early_stopping = EarlyStopping(monitor='val_loss', patience=100, restore_best_weights=True)  # Increased patience to 20

    model1.compile(
        loss=Huber(),  # Huber loss for better handling of outliers
        optimizer=RMSprop(learning_rate=0.001),  # Lower learning rate
        metrics=[RootMeanSquaredError()]
    )

    model1.fit(
        x_train, y_train,
        validation_data=(X_val, y_val),
        epochs=50000,
        callbacks=[cp, lr_scheduler, early_stopping],
        verbose=1
    )

    farmData = pd.read_csv('Input.csv').ffill()  # Replace null values with the value from the previous row
    predictdf = farmData[df.columns]
    I = predictdf.columns.get_loc(sensor)

    comparison_df = pd.read_csv('fileC.csv').astype('object')
    farmData = pd.read_csv('fileB.csv').astype('object')
    replaced_df = pd.read_csv('fileD.csv').astype('object')
    new_df = replaced_df.copy()

    for k in range(len(farmData) - window_size):
        try:
            X, y = df_to_X_y(predictdf, window_size, sensor)
            predX = model1.predict(X[k:k+1].astype('float32'), verbose=0)
            nbr = predX[0][0]
        except Exception as e:
            print(sensor + ' prediction error')
            print(e)
        
        try:
            if farmData.iloc[k+window_size, I+2] != 'Acceptable':
                farmData.iat[k+window_size, I+2] = 'Infilled'
                farmData.iat[k+window_size, I+1] = nbr
                comparison_df.iat[k+window_size, I+2] = 'Infilled'
                comparison_df.iat[k+window_size, I+1] = nbr  
                replaced_df.iat[k+window_size, I+2] = 'Infilled'
                replaced_df.iat[k+window_size, I+1] = nbr  
                predictdf.iat[k+window_size, I] = nbr  
        except Exception as e:
            print(sensor + ' data error')
            print(e)

        try:
            if abs(nbr-farmData.iat[k+window_size, I+1])/farmData.iat[k+window_size, I+1] > 0.05:
                comparison_df.iat[k+window_size, I+2] = 'uncertain'
                replaced_df.iat[k+window_size, I+2] = 'uncertain'
        except Exception as e:
            print(sensor + ' comparison error')
            print(e)

        try:
            if replaced_df.iat[k+window_size, I+2] == 'uncertain':
                replaced_df.iat[k+window_size, I+1] = nbr
                replaced_df.iat[k+window_size, I+2] = 'Replaced'
        except Exception as e:
            print(sensor + ' replacement error')
            print(e)

        try:
            new_df.iat[k+window_size, I+1] = nbr
        except Exception as e:            
            print(sensor + ' new error')
            print(e)
    df.to_csv('fileA.csv', sep=',', encoding='utf-8', index=False, float_format='%.2f')
    farmData.to_csv('fileB.csv', sep=',', encoding='utf-8', index=False, float_format='%.2f')
    comparison_df.to_csv('fileC.csv', sep=',', encoding='utf-8', index=False, float_format='%.2f')
    replaced_df.to_csv('fileD.csv', sep=',', encoding='utf-8', index=False, float_format='%.2f')
    new_df.to_csv('fileE.csv', sep=',', encoding='utf-8', index=False, float_format='%.2f')

    print(sensor + ' Done')

Processing sensor: Flow (l/s) [Catchment 1]
Epoch 1/50000
Epoch 2/50000
Epoch 3/50000
Epoch 4/50000
Epoch 5/50000
Epoch 6/50000
Epoch 7/50000
Epoch 8/50000
Epoch 9/50000
Epoch 10/50000
Epoch 11/50000
Epoch 12/50000
Epoch 13/50000
Epoch 14/50000
Epoch 15/50000
Epoch 16/50000
Epoch 17/50000
Epoch 18/50000
Epoch 19/50000
Epoch 20/50000
Epoch 21/50000
Epoch 22/50000
Epoch 23/50000
Epoch 24/50000
Epoch 25/50000
Epoch 26/50000
Epoch 27/50000
Epoch 28/50000
Epoch 29/50000
Epoch 30/50000
Epoch 31/50000
Epoch 32/50000
Epoch 33/50000
Epoch 34/50000
Epoch 35/50000
Epoch 36/50000
Epoch 37/50000
Epoch 38/50000
Epoch 39/50000
Epoch 40/50000
Epoch 41/50000
Epoch 42/50000
Epoch 43/50000
Epoch 44/50000
Epoch 45/50000
Epoch 46/50000
Epoch 47/50000
Epoch 48/50000
Epoch 49/50000
Epoch 50/50000
Epoch 51/50000
Epoch 52/50000
Epoch 53/50000
Epoch 54/50000
Epoch 55/50000
Epoch 56/50000
Epoch 57/50000
Epoch 58/50000
Epoch 59/50000
Epoch 60/50000
Epoch 61/50000
Epoch 62/50000
Epoch 63/50000
Epoch 64/50000
Epoch

KeyboardInterrupt: 

In [18]:
import pandas as pd
import folium

# Read the CSV file
df = pd.read_csv('RothBorders.csv')

# Function to parse WKT and extract coordinates
def parse_wkt(wkt_str):
    # Remove "POLYGON ((" and "))" from the WKT string
    wkt_str = wkt_str.replace("POLYGON ((", "").replace("))", "")
    # Split the coordinates by comma
    coords = wkt_str.split(", ")
    # Split each coordinate pair by space and convert to float
    coords = [(float(coord.split()[1]), float(coord.split()[0])) for coord in coords]
    return coords

# Extract all coordinates from the WKT column
all_coords = df['WKT'].apply(parse_wkt)

# Calculate the center of the map
all_latitudes = [lat for coords in all_coords for lat, lon in coords]
all_longitudes = [lon for coords in all_coords for lat, lon in coords]
center = [sum(all_latitudes) / len(all_latitudes), sum(all_longitudes) / len(all_longitudes)]

# Create a map centered around the calculated center
m = folium.Map(location=center, zoom_start=12)

# Iterate through the rows of the DataFrame and add shapes to the map
for _, row in df.iterrows():
    # Parse the WKT geometry
    coords = parse_wkt(row['WKT'])
    
    # Add the polygon to the map with a click event to update the sidebar
    polygon = folium.Polygon(
        locations=coords,
        color='blue',
        fill=True,
        fill_color='blue',
        fill_opacity=0.5
    )
    polygon.add_child(folium.Popup(row['name']))
    polygon.add_to(m)

# Save the map to an HTML file
m.save('roth_map.html')

# Add custom HTML and JavaScript for the info box
with open('roth_map.html', 'r') as file:
    map_html = file.read()

custom_html = f"""
<!DOCTYPE html>
<html>
<head>
    <title>Roth Borders Map</title>
    <style>
        #map {{
            width: 100%;
            height: 100vh;
        }}
        #info-box {{
            position: absolute;
            top: 10px;
            right: 10px;
            width: 200px;
            padding: 10px;
            background-color: white;
            border: 1px solid #ddd;
            box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
            z-index: 1000;
        }}
    </style>
</head>
<body>
    <div id="map"></div>
    <div id="info-box">
        <h4>Catchment Information</h4>
        <div id="catchment-name">Click on a catchment to see its name here.</div>
    </div>
    <script>
        {map_html}
        function updateInfoBox(name) {{
            document.getElementById('catchment-name').innerText = name;
        }}
        var polygons = document.getElementsByClassName('leaflet-interactive');
        for (var i = 0; i < polygons.length; i++) {{
            polygons[i].addEventListener('click', function() {{
                var popup = this._popup;
                if (popup) {{
                    var name = popup.getContent();
                    updateInfoBox(name);
                }}
            }});
        }}
    </script>
</body>
</html>
"""

with open('roth_map_with_info_box.html', 'w') as file:
    file.write(custom_html)

In [25]:
import folium

# Create a map centered around the Roth catchment
m = folium.Map(location=[50.770828, -3.907471], zoom_start=14)

folium.Marker(
    location=[50.770828, -3.907471],
    popup='Roth Catchment',
    icon=folium.Icon(color='green')
).add_to(m)







m

In [None]:
import pandas as pd
import folium

# Read the CSV file
df = pd.read_csv('RothBorders.csv')

# Function to parse WKT and extract coordinates
def parse_wkt(wkt_str):
    # Remove "POLYGON ((" and "))" from the WKT string
    wkt_str = wkt_str.replace("POLYGON ((", "").replace("))", "")
    # Split the coordinates by comma
    coords = wkt_str.split(", ")
    # Split each coordinate pair by space and convert to float
    coords = [(float(coord.split()[1]), float(coord.split()[0])) for coord in coords]
    return coords

# Extract all coordinates from the WKT column
all_coords = df['WKT'].apply(parse_wkt)

# Calculate the center of the map
all_latitudes = [lat for coords in all_coords for lat, lon in coords]
all_longitudes = [lon for coords in all_coords for lat, lon in coords]
center = [sum(all_latitudes) / len(all_latitudes), sum(all_longitudes) / len(all_longitudes)]

# Create a map centered around the calculated center
m = folium.Map(location=center, zoom_start=14)
# Iterate through the rows of the DataFrame and add shapes to the map
for _, row in df.iterrows():
    # Parse the WKT geometry
    coords = parse_wkt(row['WKT'])
    
    # Add the polygon to the map with a click event to update the sidebar
    polygon = folium.Polygon(
        locations=coords,
        color='blue',
        fill=True,
        fill_color='blue',
        fill_opacity=0.5
    )
    polygon.add_child(folium.Popup(row['name']))
    polygon.add_to(m)

# Save the map to an HTML file
m.save('roth_map.html')
m

In [7]:
import pandas as pd
import folium
import matplotlib.pyplot as plt
import base64
from io import BytesIO
import time

# Read the CSV files
df = pd.read_csv('RothBorders.csv')
uncertain_df = pd.read_csv('uncertainReplaced.csv')

# Function to parse WKT and extract coordinates
def parse_wkt(wkt_str):
    # Remove "POLYGON ((" and "))" from the WKT string
    wkt_str = wkt_str.replace("POLYGON ((", "").replace("))", "")
    # Split the coordinates by comma
    coords = wkt_str.split(", ")
    # Split each coordinate pair by space and convert to float
    coords = [(float(coord.split()[1]), float(coord.split()[0])) for coord in coords]
    return coords

# Function to create a graph and return it as a base64 string
def create_graph(data, title):
    fig, ax = plt.subplots()
    data.plot(kind='bar', ax=ax)
    ax.set_title(title)
    buf = BytesIO()
    plt.savefig(buf, format='png')
    buf.seek(0)
    img_base64 = base64.b64encode(buf.read()).decode('utf-8')
    plt.close(fig)
    return img_base64

# Extract all coordinates from the WKT column
start_time = time.time()
all_coords = df['WKT'].apply(parse_wkt)
print(f"Parsed WKT coordinates in {time.time() - start_time:.2f} seconds")

# Calculate the center of the map
all_latitudes = [lat for coords in all_coords for lat, lon in coords]
all_longitudes = [lon for coords in all_coords for lat, lon in coords]
center = [sum(all_latitudes) / len(all_latitudes), sum(all_longitudes) / len(all_longitudes)]

# Create a map centered around the calculated center
m = folium.Map(location=center, zoom_start=14)

# Iterate through the rows of the DataFrame and add shapes to the map
for _, row in df.iterrows():
    # Parse the WKT geometry
    coords = parse_wkt(row['WKT'])
    
    # Create the HTML for the popup
    popup_html = f"<h4>{row['name']}</h4>"
    
    # Check for any column names in uncertain_df that contain the name of the polygon
    for col in uncertain_df.columns:
        if (row['name']+']') in col:
            # Check if the column contains numeric data
            try:
                numeric_data = pd.to_numeric(uncertain_df[col])
                graph_base64 = create_graph(numeric_data, col)
                popup_html += f'<img src="data:image/png;base64,{graph_base64}"><br>'
            except ValueError:
                # Skip columns that cannot be converted to numeric data
                continue
    
    # Add the polygon to the map with a click event to update the sidebar
    polygon = folium.Polygon(
        locations=coords,
        color='blue',
        fill=True,
        fill_color='blue',
        fill_opacity=0.5
    )
    polygon.add_child(folium.Popup(popup_html, max_width=300))
    polygon.add_to(m)

# Save the map to an HTML file
m.save('roth_map.html')

# Add custom HTML and JavaScript for the info box
with open('roth_map.html', 'r') as file:
    map_html = file.read()

custom_html = f"""
<!DOCTYPE html>
<html>
<head>
    <title>Roth Borders Map</title>
    <style>
        #map {{
            width: 100%;
            height: 100vh;
        }}
        #info-box {{
            position: absolute;
            top: 10px;
            right: 10px;
            width: 200px;
            padding: 10px;
            background-color: white;
            border: 1px solid #ddd;
            box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
            z-index: 1000;
        }}
    </style>
</head>
<body>
    <div id="map"></div>
    <div id="info-box">
        <h4>Catchment Information</h4>
        <div id="catchment-name">Click on a catchment to see its name here.</div>
    </div>
    <script>
        {map_html}
        function updateInfoBox(name) {{
            document.getElementById('catchment-name').innerText = name;
        }}
        var polygons = document.getElementsByClassName('leaflet-interactive');
        for (var i = 0; i < polygons.length; i++) {{
            polygons[i].addEventListener('click', function() {{
                var popup = this._popup;
                if (popup) {{
                    var name = popup.getContent();
                    updateInfoBox(name);
                }}
            }});
        }}
    </script>
</body>
</html>
"""

with open('roth_map_with_info_box.html', 'w') as file:
    file.write(custom_html)

print(f"Script completed in {time.time() - start_time:.2f} seconds")



In [8]:
import pandas as pd
import folium
import matplotlib.pyplot as plt
import base64
from io import BytesIO
import time

# Read the CSV files
df = pd.read_csv('RothBorders.csv')
uncertain_df = pd.read_csv('uncertainReplaced.csv')

# Function to parse WKT and extract coordinates
def parse_wkt(wkt_str):
    # Remove "POLYGON ((" and "))" from the WKT string
    wkt_str = wkt_str.replace("POLYGON ((", "").replace("))", "")
    # Split the coordinates by comma
    coords = wkt_str.split(", ")
    # Split each coordinate pair by space and convert to float
    coords = [(float(coord.split()[1]), float(coord.split()[0])) for coord in coords]
    return coords

# Function to create a graph and return it as a base64 string
def create_graph(data, title):
    fig, ax = plt.subplots()
    data.plot(kind='bar', ax=ax)
    ax.set_title(title)
    buf = BytesIO()
    plt.savefig(buf, format='png')
    buf.seek(0)
    img_base64 = base64.b64encode(buf.read()).decode('utf-8')
    plt.close(fig)
    return img_base64

# Extract all coordinates from the WKT column
start_time = time.time()
all_coords = df['WKT'].apply(parse_wkt)
print(f"Parsed WKT coordinates in {time.time() - start_time:.2f} seconds")

# Calculate the center of the map
all_latitudes = [lat for coords in all_coords for lat, lon in coords]
all_longitudes = [lon for coords in all_coords for lat, lon in coords]
center = [sum(all_latitudes) / len(all_latitudes), sum(all_longitudes) / len(all_longitudes)]

# Create a map centered around the calculated center
m = folium.Map(location=center, zoom_start=14)

# Iterate through the rows of the DataFrame and add shapes to the map
for _, row in df.iterrows():
    # Parse the WKT geometry
    coords = parse_wkt(row['WKT'])
    
    # Create the HTML for the popup
    popup_html = f"<h4>{row['name']}</h4>"
    
    # Check for any column names in uncertain_df that contain the name of the polygon
    for col in uncertain_df.columns:
        if (row['name']+']') in col:
            # Check if the column contains numeric data
            try:
                numeric_data = pd.to_numeric(uncertain_df[col])
                graph_base64 = create_graph(numeric_data, col)
                popup_html += f'<img src="data:image/png;base64,{graph_base64}"><br>'
            except ValueError:
                # Skip columns that cannot be converted to numeric data
                continue
    
    # Add the polygon to the map with a click event to update the sidebar
    polygon = folium.Polygon(
        locations=coords,
        color='blue',
        fill=True,
        fill_color='blue',
        fill_opacity=0.5
    )
    polygon.add_child(folium.Popup(popup_html, max_width=300))
    polygon.add_to(m)

# Save the map to an HTML file
m.save('roth_map.html')

# Add custom HTML and JavaScript for the info box
with open('roth_map.html', 'r') as file:
    map_html = file.read()

custom_html = f"""
<!DOCTYPE html>
<html>
<head>
    <title>Roth Borders Map</title>
    <style>
        #map {{
            width: 100%;
            height: 100vh;
        }}
        #info-box {{
            position: absolute;
            top: 10px;
            right: 10px;
            width: 200px;
            padding: 10px;
            background-color: white;
            border: 1px solid #ddd;
            box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
            z-index: 1000;
        }}
    </style>
</head>
<body>
    <div id="map"></div>
    <div id="info-box">
        <h4>Catchment Information</h4>
        <div id="catchment-name">Click on a catchment to see its name here.</div>
        <div id="catchment-graph"></div>
    </div>
    <script>
        {map_html}
        function updateInfoBox(name, graph) {{
            document.getElementById('catchment-name').innerText = name;
            document.getElementById('catchment-graph').innerHTML = graph;
        }}
        var polygons = document.getElementsByClassName('leaflet-interactive');
        for (var i = 0; i < polygons.length; i++) {{
            polygons[i].addEventListener('click', function() {{
                var popup = this._popup;
                if (popup) {{
                    var content = popup.getContent();
                    var parser = new DOMParser();
                    var doc = parser.parseFromString(content, 'text/html');
                    var name = doc.querySelector('h4').innerText;
                    var graph = doc.querySelector('img').outerHTML;
                    updateInfoBox(name, graph);
                }}
            }});
        }}
    </script>
</body>
</html>
"""

with open('roth_map_with_info_box.html', 'w') as file:
    file.write(custom_html)

print(f"Script completed in {time.time() - start_time:.2f} seconds")



In [10]:
import pandas as pd
import folium
import matplotlib.pyplot as plt
import base64
from io import BytesIO
import time

# Read the CSV files
df = pd.read_csv('RothBorders.csv')
uncertain_df = pd.read_csv('uncertainReplaced.csv')

# Function to parse WKT and extract coordinates
def parse_wkt(wkt_str):
    # Remove "POLYGON ((" and "))" from the WKT string
    wkt_str = wkt_str.replace("POLYGON ((", "").replace("))", "")
    # Split the coordinates by comma
    coords = wkt_str.split(", ")
    # Split each coordinate pair by space and convert to float
    coords = [(float(coord.split()[1]), float(coord.split()[0])) for coord in coords]
    return coords

# Function to create a graph and return it as a base64 string
def create_graph(data, title):
    fig, ax = plt.subplots()
    data.plot(kind='bar', ax=ax)
    ax.set_title(title)
    buf = BytesIO()
    plt.savefig(buf, format='png')
    buf.seek(0)
    img_base64 = base64.b64encode(buf.read()).decode('utf-8')
    plt.close(fig)
    return img_base64

# Extract all coordinates from the WKT column
start_time = time.time()
all_coords = df['WKT'].apply(parse_wkt)
print(f"Parsed WKT coordinates in {time.time() - start_time:.2f} seconds")

# Calculate the center of the map
all_latitudes = [lat for coords in all_coords for lat, lon in coords]
all_longitudes = [lon for coords in all_coords for lat, lon in coords]
center = [sum(all_latitudes) / len(all_latitudes), sum(all_longitudes) / len(all_longitudes)]

# Create a map centered around the calculated center
m = folium.Map(location=center, zoom_start=14)

# Iterate through the rows of the DataFrame and add shapes to the map
for _, row in df.iterrows():
    # Parse the WKT geometry
    coords = parse_wkt(row['WKT'])
    
    # Create the HTML for the popup
    popup_html = f"<h4>{row['name']}</h4>"
    
    # Check for any column names in uncertain_df that contain the name of the polygon
    for col in uncertain_df.columns:
        if (row['name']+']') in col:
            # Check if the column contains numeric data
            try:
                numeric_data = pd.to_numeric(uncertain_df[col])
                graph_base64 = create_graph(numeric_data, col)
                popup_html += f'<img src="data:image/png;base64,{graph_base64}"><br>'
            except ValueError:
                # Skip columns that cannot be converted to numeric data
                continue
    
    # Add the polygon to the map with a click event to update the sidebar
    polygon = folium.Polygon(
        locations=coords,
        color='blue',
        fill=True,
        fill_color='blue',
        fill_opacity=0.5
    )
    polygon.add_child(folium.Popup(popup_html, max_width=300))
    polygon.add_to(m)

# Save the map to an HTML file
m.save('roth_map.html')

# Add custom HTML and JavaScript for the info box
with open('roth_map.html', 'r') as file:
    map_html = file.read()

custom_html = f"""
<!DOCTYPE html>
<html>
<head>
    <title>Roth Borders Map</title>
    <style>
        #map {{
            width: 100%;
            height: 100vh;
        }}
        #info-box {{
            position: absolute;
            top: 10px;
            right: 10px;
            width: 200px;
            padding: 10px;
            background-color: white;
            border: 1px solid #ddd;
            box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
            z-index: 1000;
        }}
    </style>
</head>
<body>
    <div id="map"></div>
    <div id="info-box">
        <h4>Catchment Information</h4>
        <div id="catchment-name">Click on a catchment to see its name here.</div>
        <div id="catchment-graph"></div>
    </div>
    {map_html}
    <script>
        function updateInfoBox(name, graph) {{
            document.getElementById('catchment-name').innerText = name;
            document.getElementById('catchment-graph').innerHTML = graph;
        }}
        var polygons = document.getElementsByClassName('leaflet-interactive');
        for (var i = 0; i < polygons.length; i++) {{
            polygons[i].addEventListener('click', function() {{
                var popup = this._popup;
                if (popup) {{
                    var content = popup.getContent();
                    var parser = new DOMParser();
                    var doc = parser.parseFromString(content, 'text/html');
                    var name = doc.querySelector('h4').innerText;
                    var graph = doc.querySelector('img') ? doc.querySelector('img').outerHTML : '';
                    updateInfoBox(name, graph);
                }}
            }});
        }}
    </script>
</body>
</html>
"""

with open('roth_map_with_info_box.html', 'w') as file:
    file.write(custom_html)

print(f"Script completed in {time.time() - start_time:.2f} seconds")



In [12]:
import pandas as pd
import folium
import matplotlib.pyplot as plt
import base64
from io import BytesIO
import time

# Read the CSV files
df = pd.read_csv('RothBorders.csv')
uncertain_df = pd.read_csv('uncertainReplaced.csv')

# Function to parse WKT and extract coordinates
def parse_wkt(wkt_str):
    # Remove "POLYGON ((", and "))" from the WKT string
    wkt_str = wkt_str.replace("POLYGON ((", "").replace("))", "")
    # Split the coordinates by comma
    coords = wkt_str.split(", ")
    # Split each coordinate pair by space and convert to float
    coords = [(float(coord.split()[1]), float(coord.split()[0])) for coord in coords]
    return coords

# Function to create a graph and return it as a base64 string
def create_graph(data, title):
    fig, ax = plt.subplots()
    data.plot(kind='bar', ax=ax)
    ax.set_title(title)
    buf = BytesIO()
    plt.savefig(buf, format='png')
    buf.seek(0)
    img_base64 = base64.b64encode(buf.read()).decode('utf-8')
    plt.close(fig)
    return img_base64

# Extract all coordinates from the WKT column
start_time = time.time()
all_coords = df['WKT'].apply(parse_wkt)
print(f"Parsed WKT coordinates in {time.time() - start_time:.2f} seconds")

# Calculate the center of the map
all_latitudes = [lat for coords in all_coords for lat, lon in coords]
all_longitudes = [lon for coords in all_coords for lat, lon in coords]
center = [sum(all_latitudes) / len(all_latitudes), sum(all_longitudes) / len(all_longitudes)]

# Create a map centered around the calculated center
m = folium.Map(location=center, zoom_start=14)

# Iterate through the rows of the DataFrame and add shapes to the map
for _, row in df.iterrows():
    # Parse the WKT geometry
    coords = parse_wkt(row['WKT'])
    
    # Create the HTML for the popup
    popup_html = f"<h4>{row['name']}</h4>"
    
    # Check for any column names in uncertain_df that contain the name of the polygon
    for col in uncertain_df.columns:
        if (row['name']+']') in col:
            # Check if the column contains numeric data
            try:
                numeric_data = pd.to_numeric(uncertain_df[col])
                graph_base64 = create_graph(numeric_data, col)
                popup_html += f'<img src="data:image/png;base64,{graph_base64}"><br>'
            except ValueError:
                # Skip columns that cannot be converted to numeric data
                continue
    
    # Add the polygon to the map with a click event to update the sidebar
    polygon = folium.Polygon(
        locations=coords,
        color='blue',
        fill=True,
        fill_color='blue',
        fill_opacity=0.5
    )
    polygon.add_child(folium.Popup(popup_html, max_width=300))
    polygon.add_to(m)

# Save the map to an HTML file
m.save('roth_map.html')

# Add custom HTML and JavaScript for the info box
with open('roth_map.html', 'r') as file:
    map_html = file.read()

custom_html = f"""
<!DOCTYPE html>
<html>
<head>
    <title>Roth Borders Map</title>
    <style>
        #map {{
            width: 100%;
            height: 100vh;
        }}
        #info-box {{
            position: absolute;
            top: 10px;
            right: 10px;
            width: 200px;
            padding: 10px;
            background-color: white;
            border: 1px solid #ddd;
            box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
            z-index: 1000;
            overflow-y: auto;
            max-height: 90vh;
        }}
        .leaflet-popup-content {{
            max-height: 400px;
            overflow-y: auto;
        }}
    </style>
</head>
<body>
    <div id="map"></div>
    <div id="info-box">
        <h4>Catchment Information</h4>
        <div id="catchment-name">Click on a catchment to see its name here.</div>
        <div id="catchment-graph"></div>
    </div>
    {map_html}
    <script>
        function updateInfoBox(name, graph) {{
            document.getElementById('catchment-name').innerText = name;
            document.getElementById('catchment-graph').innerHTML = graph;
        }}
        var polygons = document.getElementsByClassName('leaflet-interactive');
        for (var i = 0; i < polygons.length; i++) {{
            polygons[i].addEventListener('click', function() {{
                var popup = this._popup;
                if (popup) {{
                    var content = popup.getContent();
                    var parser = new DOMParser();
                    var doc = parser.parseFromString(content, 'text/html');
                    var name = doc.querySelector('h4').innerText;
                    var graph = doc.querySelector('img') ? doc.querySelector('img').outerHTML : '';
                    updateInfoBox(name, graph);
                }}
            }});
        }}
    </script>
</body>
</html>
"""

with open('roth_map_with_info_box.html', 'w') as file:
    file.write(custom_html)

print(f"Script completed in {time.time() - start_time:.2f} seconds")



In [None]:
import pandas as pd
import folium
import matplotlib.pyplot as plt
import base64
from io import BytesIO
import time

# Read the CSV files
df = pd.read_csv('RothBorders.csv')
uncertain_df = pd.read_csv('uncertainReplaced.csv')

# Function to parse WKT and extract coordinates
def parse_wkt(wkt_str):
    # Remove "POLYGON ((", and "))" from the WKT string
    wkt_str = wkt_str.replace("POLYGON ((", "").replace("))", "")
    # Split the coordinates by comma
    coords = wkt_str.split(", ")
    # Split each coordinate pair by space and convert to float
    coords = [(float(coord.split()[1]), float(coord.split()[0])) for coord in coords]
    return coords

# Function to create a graph and return it as a base64 string
def create_graph(data, title):
    fig, ax = plt.subplots()
    data.plot(kind='bar', ax=ax)
    ax.set_title(title)
    ax.set_xticklabels([])  # Remove x-axis labels
    buf = BytesIO()
    plt.savefig(buf, format='png')
    buf.seek(0)
    img_base64 = base64.b64encode(buf.read()).decode('utf-8')
    plt.close(fig)
    return img_base64

# Extract all coordinates from the WKT column
start_time = time.time()
all_coords = df['WKT'].apply(parse_wkt)
print(f"Parsed WKT coordinates in {time.time() - start_time:.2f} seconds")

# Calculate the center of the map
all_latitudes = [lat for coords in all_coords for lat, lon in coords]
all_longitudes = [lon for coords in all_coords for lat, lon in coords]
center = [sum(all_latitudes) / len(all_latitudes), sum(all_longitudes) / len(all_longitudes)]

# Create a map centered around the calculated center
m = folium.Map(location=center, zoom_start=14)

# Iterate through the rows of the DataFrame and add shapes to the map
for _, row in df.iterrows():
    # Parse the WKT geometry
    coords = parse_wkt(row['WKT'])
    
    # Create the HTML for the popup
    popup_html = f"<h4>{row['name']}</h4>"
    
    # Check for any column names in uncertain_df that contain the name of the polygon
    for col in uncertain_df.columns:
        if (row['name']+']') in col:
            # Check if the column contains numeric data
            try:
                numeric_data = pd.to_numeric(uncertain_df[col])
                graph_base64 = create_graph(numeric_data, col)
                popup_html += f'<img src="data:image/png;base64,{graph_base64}"><br>'
            except ValueError:
                # Skip columns that cannot be converted to numeric data
                continue
    
    # Add the polygon to the map with a click event to update the sidebar
    polygon = folium.Polygon(
        locations=coords,
        color='blue',
        fill=True,
        fill_color='blue',
        fill_opacity=0.5
    )
    polygon.add_child(folium.Popup(popup_html, max_width=500))  # Increase max_width to 500
    polygon.add_to(m)

# Save the map to an HTML file
m.save('roth_map.html')

# Add custom HTML and JavaScript for the info box
with open('roth_map.html', 'r') as file:
    map_html = file.read()

custom_html = f"""
<!DOCTYPE html>
<html>
<head>
    <title>Roth Borders Map</title>
    <style>
        #map {{
            width: 100%;
            height: 100vh;
        }}
        #info-box {{
            position: absolute;
            top: 10px;
            right: 10px;
            width: 200px;
            padding: 10px;
            background-color: white;
            border: 1px solid #ddd;
            box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
            z-index: 1000;
            overflow-y: auto;
            max-height: 90vh;
        }}
        .leaflet-popup-content {{
            max-height: 600px;
            overflow-y: auto;
        }}
    </style>
</head>
<body>
    <div id="map"></div>
    <div id="info-box">
        <h4>Catchment Information</h4>
        <div id="catchment-name">Click on a catchment to see its name here.</div>
        <div id="catchment-graph"></div>
    </div>
    {map_html}
    <script>
        function updateInfoBox(name, graph) {{
            document.getElementById('catchment-name').innerText = name;
            document.getElementById('catchment-graph').innerHTML = graph;
        }}
        var polygons = document.getElementsByClassName('leaflet-interactive');
        for (var i = 0; i < polygons.length; i++) {{
            polygons[i].addEventListener('click', function() {{
                var popup = this._popup;
                if (popup) {{
                    var content = popup.getContent();
                    var parser = new DOMParser();
                    var doc = parser.parseFromString(content, 'text/html');
                    var name = doc.querySelector('h4').innerText;
                    var graph = doc.querySelector('img') ? doc.querySelector('img').outerHTML : '';
                    updateInfoBox(name, graph);
                }}
            }});
        }}
    </script>
</body>
</html>
"""

with open('roth_map_with_info_box.html', 'w') as file:
    file.write(custom_html)

print(f"Script completed in {time.time() - start_time:.2f} seconds")



In [16]:
import pandas as pd
import folium
import plotly.express as px
import base64
from io import BytesIO
import time

# Read the CSV files
df = pd.read_csv('RothBorders.csv')
uncertain_df = pd.read_csv('uncertainReplaced.csv')

# Function to parse WKT and extract coordinates
def parse_wkt(wkt_str):
    # Remove "POLYGON ((", and "))" from the WKT string
    wkt_str = wkt_str.replace("POLYGON ((", "").replace("))", "")
    # Split the coordinates by comma
    coords = wkt_str.split(", ")
    # Split each coordinate pair by space and convert to float
    coords = [(float(coord.split()[1]), float(coord.split()[0])) for coord in coords]
    return coords

# Function to create an interactive graph and return it as an HTML string
def create_graph(data, title):
    fig = px.bar(data, title=title)
    fig.update_layout(xaxis_title=None, yaxis_title=None)
    graph_html = fig.to_html(full_html=False)
    return graph_html

# Extract all coordinates from the WKT column
start_time = time.time()
all_coords = df['WKT'].apply(parse_wkt)
print(f"Parsed WKT coordinates in {time.time() - start_time:.2f} seconds")

# Calculate the center of the map
all_latitudes = [lat for coords in all_coords for lat, lon in coords]
all_longitudes = [lon for coords in all_coords for lat, lon in coords]
center = [sum(all_latitudes) / len(all_latitudes), sum(all_longitudes) / len(all_longitudes)]

# Create a map centered around the calculated center
m = folium.Map(location=center, zoom_start=14)

# Iterate through the rows of the DataFrame and add shapes to the map
for _, row in df.iterrows():
    # Parse the WKT geometry
    coords = parse_wkt(row['WKT'])
    
    # Create the HTML for the popup
    popup_html = f"<h4>{row['name']}</h4>"
    
    # Check for any column names in uncertain_df that contain the name of the polygon
    for col in uncertain_df.columns:
        if (row['name']+']') in col:
            # Check if the column contains numeric data
            try:
                numeric_data = pd.to_numeric(uncertain_df[col])
                graph_html = create_graph(numeric_data, col)
                popup_html += f'{graph_html}<br>'
            except ValueError:
                # Skip columns that cannot be converted to numeric data
                continue
    
    # Add the polygon to the map with a click event to update the sidebar
    polygon = folium.Polygon(
        locations=coords,
        color='blue',
        fill=True,
        fill_color='blue',
        fill_opacity=0.5
    )
    polygon.add_child(folium.Popup(popup_html, max_width=500))  # Increase max_width to 500
    polygon.add_to(m)

# Save the map to an HTML file
m.save('roth_map.html')

# Add custom HTML and JavaScript for the info box
with open('roth_map.html', 'r') as file:
    map_html = file.read()

custom_html = f"""
<!DOCTYPE html>
<html>
<head>
    <title>Roth Borders Map</title>
    <style>
        #map {{
            width: 100%;
            height: 100vh;
        }}
        #info-box {{
            position: absolute;
            top: 10px;
            right: 10px;
            width: 200px;
            padding: 10px;
            background-color: white;
            border: 1px solid #ddd;
            box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
            z-index: 1000;
            overflow-y: auto;
            max-height: 90vh;
        }}
        .leaflet-popup-content {{
            max-height: 600px;
            overflow-y: auto;
        }}
    </style>
</head>
<body>
    <div id="map"></div>
    <div id="info-box">
        <h4>Catchment Information</h4>
        <div id="catchment-name">Click on a catchment to see its name here.</div>
        <div id="catchment-graph"></div>
    </div>
    {map_html}
    <script>
        function updateInfoBox(name, graph) {{
            document.getElementById('catchment-name').innerText = name;
            document.getElementById('catchment-graph').innerHTML = graph;
        }}
        var polygons = document.getElementsByClassName('leaflet-interactive');
        for (var i = 0; i < polygons.length; i++) {{
            polygons[i].addEventListener('click', function() {{
                var popup = this._popup;
                if (popup) {{
                    var content = popup.getContent();
                    var parser = new DOMParser();
                    var doc = parser.parseFromString(content, 'text/html');
                    var name = doc.querySelector('h4').innerText;
                    var graph = doc.querySelector('div.plotly-graph-div') ? doc.querySelector('div.plotly-graph-div').outerHTML : '';
                    updateInfoBox(name, graph);
                }}
            }});
        }}
    </script>
</body>
</html>
"""

with open('roth_map_with_info_box.html', 'w') as file:
    file.write(custom_html)

print(f"Script completed in {time.time() - start_time:.2f} seconds")



In [19]:
import pandas as pd
import folium
import matplotlib.pyplot as plt
import base64
from io import BytesIO
import time

# Read the CSV files
df = pd.read_csv('RothBorders.csv')
uncertain_df = pd.read_csv('uncertainReplaced.csv')

# Function to parse WKT and extract coordinates
def parse_wkt(wkt_str):
    # Remove "POLYGON ((", and "))" from the WKT string
    wkt_str = wkt_str.replace("POLYGON ((", "").replace("))", "")
    # Split the coordinates by comma
    coords = wkt_str.split(", ")
    # Split each coordinate pair by space and convert to float
    coords = [(float(coord.split()[1]), float(coord.split()[0])) for coord in coords]
    return coords

# Function to create a graph and return it as a base64 string
def create_graph(data, title):
    fig, ax = plt.subplots()
    data.plot(kind='bar', ax=ax)
    ax.set_title(title)
    ax.set_xticklabels([])  # Remove x-axis labels
    buf = BytesIO()
    plt.savefig(buf, format='png')
    buf.seek(0)
    img_base64 = base64.b64encode(buf.read()).decode('utf-8')
    plt.close(fig)
    return img_base64

# Extract all coordinates from the WKT column
start_time = time.time()
all_coords = df['WKT'].apply(parse_wkt)
print(f"Parsed WKT coordinates in {time.time() - start_time:.2f} seconds")

# Calculate the center of the map
all_latitudes = [lat for coords in all_coords for lat, lon in coords]
all_longitudes = [lon for coords in all_coords for lat, lon in coords]
center = [sum(all_latitudes) / len(all_latitudes), sum(all_longitudes) / len(all_longitudes)]

# Create a map centered around the calculated center
m = folium.Map(location=center, zoom_start=14)

# Iterate through the rows of the DataFrame and add shapes to the map
for _, row in df.iterrows():
    # Parse the WKT geometry
    coords = parse_wkt(row['WKT'])
    
    # Create the HTML for the popup
    popup_html = f"<h4>{row['name']}</h4>"
    
    # Check for any column names in uncertain_df that contain the name of the polygon
    for col in uncertain_df.columns:
        if (row['name']+']') in col:
            # Check if the column contains numeric data
            try:
                numeric_data = pd.to_numeric(uncertain_df[col])
                graph_base64 = create_graph(numeric_data, col)
                popup_html += f'<img src="data:image/png;base64,{graph_base64}"><br>'
            except ValueError:
                # Skip columns that cannot be converted to numeric data
                continue
    
    # Add the polygon to the map with a click event to update the sidebar
    polygon = folium.Polygon(
        locations=coords,
        color='blue',
        fill=True,
        fill_color='blue',
        fill_opacity=0.5
    )
    polygon.add_child(folium.Popup(popup_html, max_width=500))  # Increase max_width to 500
    polygon.add_to(m)

# Save the map to an HTML file
m.save('roth_map.html')

# Add custom HTML and JavaScript for the info box
with open('roth_map.html', 'r') as file:
    map_html = file.read()

custom_html = f"""
<!DOCTYPE html>
<html>
<head>
    <title>Roth Borders Map</title>
    <style>
        #map {{
            width: 100%;
            height: 100vh;
        }}
        #info-box {{
            position: absolute;
            top: 0px;
            right: 0px;
            height: 100%;
            width: 30%;
            padding: 10px;
            background-color: white;
            border: 1px solid #ddd;
            box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
            z-index: 1000;
            overflow-y: auto;
            max-height: 90vh;
        }}
        .leaflet-popup-content {{
            max-height: 600px;
            overflow-y: auto;
        }}
    </style>
</head>
<body>
    <div id="map"></div>
    <div id="info-box">
        <h4>Catchment Information</h4>
        <div id="catchment-name">Click on a catchment to see its name here.</div>
        <div id="catchment-graph"></div>
    </div>
    {map_html}
    <script>
        function updateInfoBox(name, graph) {{
            document.getElementById('catchment-name').innerText = name;
            document.getElementById('catchment-graph').innerHTML = graph;
        }}
        var polygons = document.getElementsByClassName('leaflet-interactive');
        for (var i = 0; i < polygons.length; i++) {{
            polygons[i].addEventListener('click', function() {{
                var popup = this._popup;
                if (popup) {{
                    var content = popup.getContent();
                    var parser = new DOMParser();
                    var doc = parser.parseFromString(content, 'text/html');
                    var name = doc.querySelector('h4').innerText;
                    var graph = doc.querySelector('img') ? doc.querySelector('img').outerHTML : '';
                    updateInfoBox(name, graph);
                }}
            }});
        }}
    </script>
</body>
</html>
"""

with open('roth_map_with_info_box_test.html', 'w') as file:
    file.write(custom_html)

print(f"Script completed in {time.time() - start_time:.2f} seconds")

