In [3]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

# Load the data (replace 'file.csv' with your file path)
file_path = 'OceanCleaned.csv'  # Update the file path

data = pd.read_csv(file_path)

# Display first few rows to verify
data.head()

# Step 1: Preprocessing
# Drop unnecessary columns (e.g., Mooring Name, Date, Time)
data = data.drop(['Mooring Name', 'Date', 'Time'], axis=1)

# Check for missing values
data = data.dropna()  # Drop rows with missing values (or handle them as needed)

# Step 2: Define features (X) and target (y)
X = data.drop(['pH (total scale)'], axis=1)  # Features
y = data['pH (total scale)']  # Target

# Step 3: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Train Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Step 5: Evaluate the model
y_pred = model.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)

print("Model Evaluation:")
print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")

# Step 6: Make predictions (example)
example_data = X_test.iloc[:5]  # Take the first 5 rows from the test set as examples
predicted_pH = model.predict(example_data)

print("\nExample Predictions:")
print(predicted_pH)


Model Evaluation:
Mean Absolute Error (MAE): 7.239729587632444e-17
Mean Squared Error (MSE): 1.2860343168399246e-31
Root Mean Squared Error (RMSE): 3.586132062319965e-16

Example Predictions:
[8.36068 8.36576 8.36308 8.36409 8.36269]
