In [1]:
import pandas as pd

try:
    # Load the real dataset from the specified file path
    df = pd.read_csv("Tide Prediction.csv")
    
    # We then use the `sample` method with `n=500` and `random_state=42`
    # to get a random sample of 500 rows for training the model.
    df = df.sample(n=500, random_state=42)

except FileNotFoundError:
    print(f"Error: The file '{df}' was not found.")
    print("Please check the file path and try again.")
    # Exit or handle the error gracefully if the file is not found
    exit()
except Exception as e:
    print(f"An error occurred while reading the file: {e}")
    exit()

  df = pd.read_csv("Tide Prediction.csv")


In [2]:
df.shape

(500, 6)

In [3]:
df.isnull().sum()

time               0
longitude          0
latitude           0
stationID          0
Water_Level        0
Water_Level_ODM    0
dtype: int64

In [4]:
df=df.dropna()


In [5]:
df['latitude'] = pd.to_numeric(df['latitude'], errors='coerce')
df['longitude'] = pd.to_numeric(df['longitude'], errors='coerce')
df['Water_Level'] = pd.to_numeric(df['Water_Level'], errors='coerce')
df['Water_Level_ODM'] = pd.to_numeric(df['Water_Level_ODM'], errors='coerce')

In [6]:
df.head()

Unnamed: 0,time,longitude,latitude,stationID,Water_Level,Water_Level_ODM
2601242,2019-02-04T10:54:00Z,-6.385,54.008,Dundalk,4.16,1.87
6488216,2017-04-08T02:24:00Z,-6.4589,52.33852,Wexford,0.99,-0.03
1136201,2017-08-14T06:10:00Z,-6.090086,53.21909,Bray_Harbour_MODELLED,2.48,0.13
5774859,2018-09-21T19:06:00Z,-6.108117,53.585,Skerries,4.02,0.78
3134772,2017-08-23T03:30:00Z,-9.885342,52.280815,Fenit_MODELLED,3.78,1.03


In [7]:
df['Latitude']=df['latitude']
df['Longitude']=df['longitude']

In [8]:
df.head()

Unnamed: 0,time,longitude,latitude,stationID,Water_Level,Water_Level_ODM,Latitude,Longitude
2601242,2019-02-04T10:54:00Z,-6.385,54.008,Dundalk,4.16,1.87,54.008,-6.385
6488216,2017-04-08T02:24:00Z,-6.4589,52.33852,Wexford,0.99,-0.03,52.33852,-6.4589
1136201,2017-08-14T06:10:00Z,-6.090086,53.21909,Bray_Harbour_MODELLED,2.48,0.13,53.21909,-6.090086
5774859,2018-09-21T19:06:00Z,-6.108117,53.585,Skerries,4.02,0.78,53.585,-6.108117
3134772,2017-08-23T03:30:00Z,-9.885342,52.280815,Fenit_MODELLED,3.78,1.03,52.280815,-9.885342


In [9]:
df.drop(['longitude', 'latitude'], axis=1, inplace=True)

In [10]:

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
import io
import numpy as np
import joblib

# --- 1. MOCK DATASET CREATION ---
# In a real project, you would load a file like this:
# df = pd.read_csv('Tide Prediction.csv')


# For a realistic risk prediction, we'll assign a 'risk score' based on water levels.
# This would be your target variable.
# We'll create a simple function to do this for demonstration purposes.
def assign_risk_score(row):
    # A simple risk score based on an average of the two water level measurements
    # We explicitly convert the columns to float to avoid TypeError
    avg_level = (float(row['Water_Level']) + float(row['Water_Level_ODM'])) / 2
    if avg_level > 3.0:
        return 3 # High Risk
    elif avg_level > 1.5:
        return 2 # Moderate Risk
    else:
        return 1 # Low Risk

df['risk_score'] = df.apply(assign_risk_score, axis=1)

# --- 2. DATA PREPARATION AND MODEL TRAINING ---

# Define features (X) and target (y) for the model.
# We'll use location, water level and ODM as features
# features = ['longitude', 'latitude', 'Water_Level', 'Water_Level_ODM']
features = ['Latitude', 'Longitude']
X = df[features]
y = df['risk_score']

# We are using a RandomForestRegressor because we're predicting a numerical risk score.
# This model is robust and works well with multiple features.
model = RandomForestRegressor(n_estimators=100, random_state=42)

# Train the model on the entire dataset
# In a production environment, you would use train_test_split for validation.
model.fit(X, y)

model_filename = 'tide_prediction.joblib'
joblib.dump(model, model_filename)
print(f"\nModel successfully saved to {model_filename}")

# --- 3. PREDICTION FUNCTION ---

def predict_risk(user_lat, user_lon):
    # For a real-time system, you would get current water level data for the user's location.
    # Since we're using a static dataset, we'll find the nearest station
    # and predict based on its last known water levels.
    
    # Calculate Euclidean distance to all stations in the dataset
    # distances = np.sqrt(
    #     (df['latitude'] - user_lat)**2 + 
    #     (df['longitude'] - user_lon)**2
    # )

    # # Find the data of the nearest point
    # nearest_point_data = df.loc[distances.idxmin()]
    
    input_df = pd.DataFrame([[user_lat, user_lon]], columns=['Latitude', 'Longitude'])
    
    # Create a DataFrame for the new input based on the nearest point's data
    # We use a DataFrame because the model expects a 2D array-like input.
    # prediction_input = pd.DataFrame([nearest_point_data[features]])
    
    # Predict the risk score using the trained model
    predicted_score = model.predict(input_df)[0]

    # Map the predicted score to a risk label
    if predicted_score >= 2.5:
        risk_label = 'High'
    elif predicted_score >= 1.5:
        risk_label = 'Moderate'
    else:
        risk_label = 'Low'

    return predicted_score, risk_label

# --- 4. USER INTERFACE AND EXECUTION ---

if __name__ == "__main__":
    print("Coastal Flood Risk Predictor (Tide Prediction Data)")
    print("--------------------------------------------------")
    print("This tool predicts flood risk based on tide station data.")

    try:
        user_lat = float(input("Enter Latitude: "))
        user_lon = float(input("Enter Longitude: "))

        # Get the prediction for the user's input
        score, label = predict_risk(user_lat, user_lon)
        
        print(f"\nPredicted Flood Risk Score (1-3): {score:.2f}")
        print(f"Assigned Flood Risk Label: {label}")
        
    except ValueError:
        print("Invalid input. Please enter numerical values for latitude and longitude.")


Model successfully saved to tide_prediction.joblib
Coastal Flood Risk Predictor (Tide Prediction Data)
--------------------------------------------------
This tool predicts flood risk based on tide station data.

Predicted Flood Risk Score (1-3): 1.86
Assigned Flood Risk Label: Moderate
