In [None]:
import pandas as pd 

def get_corrosion_rate(df_corrosion, location):
    """
    extracts the corrosion rate of the closest cell to a specified location. 
    If the cell value in NaN, a square search is undertaken, where the neighbouring cells are searched, 
    and if those are still NaN, the square size is increased until a non NaN cell is found.
    """
    lat, lon = location
    lon_rounded = round(lon)
    lat_rounded = round(lat)
    corrosion_rate = df_corrosion.loc[lat_rounded, lon_rounded]

    if pd.isna(corrosion_rate):
        square_size = 1
        while pd.isna(corrosion_rate):
            for i in range(lat_rounded - square_size, lat_rounded + square_size + 1):
                for j in range(lon_rounded - square_size, lon_rounded + square_size + 1):
                    corrosion_rate = df_corrosion.iloc[i, j]
                    if not pd.isna(corrosion_rate):
                        return corrosion_rate
            square_size += 1

    return corrosion_rate


In [None]:
from _1_Lab_Data import norm_input
import numpy as np


def global_model(model, df_temp, df_sal, df_doxy, df_pH):
    """
    applies the corrosion model to the global seawater data, and calculates the corrosion rate for each cell
    """

    # disregard cells in which there is at least one NaN value across the four corrosion indicators 
    mask_valid_cells = (~df_temp.isnull()) & (~df_sal.isnull()) & (~df_doxy.isnull()) & (~df_pH.isnull())

    # apply mask
    valid_indices = np.where(mask_valid_cells)
    valid_locations = list(zip(valid_indices[0], valid_indices[1]))
    
    corrosion_inputs = []

    for i, j in valid_locations:
        temp = df_temp.iat[i, j]
        sal = df_sal.iat[i, j]
        doxy = df_doxy.iat[i, j] / 43.570 # 1 ml/l of O2 is approximately 43.570 µmol/kg (doxy data is in µmol/kg)
        ph = df_pH.iat[i, j]
        corrosion_inputs.append([temp, doxy, sal, ph])

    input_data = np.array(corrosion_inputs)

    input_data_normalized = norm_input(input_data)

    predictions = model.predict(input_data_normalized)
    df_corrosion = pd.DataFrame(index=df_temp.index, columns=df_temp.columns)

    for idx, (i, j) in enumerate(valid_locations):
        df_corrosion.iat[i, j] = predictions[idx]
    
    return df_corrosion

In [2]:
def field_test_results():
    """
    creates a dataframe based on the field test values from other papers
    """
    # Dataframe storing observed values
    field_test = pd.DataFrame({
        "Taylors Beach": [0.09, -32.7507, 152.0673],
        # "Le Harve Harbour": [0.1, 49.4750, 0.1333],
        # "Jervis Bay": [0.27, -35.0557, 150.7362],
        "Qingdao": [0.19, 36.066898, 120.382698],
        "Zhoushan": [0.15, 30.0000, 122.0000],
        "Xiamen": [0.17, 24.479834, 118.089424],
        "Yulin": [0.11, 22.636379, 110.164756]
    }).T
    field_test.columns = ["corrosion", "lat", "lon"]
    field_test = field_test[["lat", "lon", "corrosion"]]

    return field_test

def compare_model(df_corrosion):
    field_results = field_test_results()
    field_results['corrosion_pred'] = field_results.apply(lambda row: get_corrosion_rate(df_corrosion, (row['lat'], row['lon'])), axis=1)
    field_results['corrosion_diff'] = abs(field_results['corrosion'] - field_results['corrosion_pred'])
    field_results['corrosion_perc'] = 100*(field_results['corrosion_diff'] / ((field_results['corrosion'] + field_results['corrosion_pred'])/2))
    return field_results
