In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load the temporal data
temporal_data = pd.read_csv('preprocessing/cleaned-temporal-data.csv')

# Prepare temporal data for analysis
X_temporal = temporal_data[['Year', 'MPA']]
y_temporal = temporal_data['FisheryConsumption']

# Split the data into training and test sets
X_train_temporal, X_test_temporal, y_train_temporal, y_test_temporal = train_test_split(X_temporal, y_temporal, test_size=0.2, random_state=42)

# Initialize the models
rf_temporal = RandomForestRegressor(random_state=42)
gb_temporal = GradientBoostingRegressor(random_state=42)
svm_temporal = SVR()
# Logistic Regression is not suitable for regression tasks. It is better used for classification.

# Fit the models
rf_temporal.fit(X_train_temporal, y_train_temporal)
gb_temporal.fit(X_train_temporal, y_train_temporal)
svm_temporal.fit(X_train_temporal, y_train_temporal)

# Predict on the test set
y_pred_rf_temporal = rf_temporal.predict(X_test_temporal)
y_pred_gb_temporal = gb_temporal.predict(X_test_temporal)
y_pred_svm_temporal = svm_temporal.predict(X_test_temporal)

# Evaluate the models
mse_rf_temporal = mean_squared_error(y_test_temporal, y_pred_rf_temporal)
r2_rf_temporal = r2_score(y_test_temporal, y_pred_rf_temporal)

mse_gb_temporal = mean_squared_error(y_test_temporal, y_pred_gb_temporal)
r2_gb_temporal = r2_score(y_test_temporal, y_pred_gb_temporal)

mse_svm_temporal = mean_squared_error(y_test_temporal, y_pred_svm_temporal)
r2_svm_temporal = r2_score(y_test_temporal, y_pred_svm_temporal)

print(f"Random Forest - MSE: {mse_rf_temporal}, R²: {r2_rf_temporal}")
print(f"Gradient Boosting - MSE: {mse_gb_temporal}, R²: {r2_gb_temporal}")
print(f"SVM - MSE: {mse_svm_temporal}, R²: {r2_svm_temporal}")


Random Forest - MSE: 56913330840.49683, R²: -1.8033631275413624
Gradient Boosting - MSE: 50847681804.858246, R²: -1.504589244516111
SVM - MSE: 112338301727.69785, R²: -4.5334145484586275


In [4]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.svm import SVR
from sklearn.linear_model import LogisticRegression

# Load the spatial data
spatial_data = pd.read_csv('preprocessing/cleaned-spatial-data.csv')

# Encode the 'Country Name' categorical variable
encoder = OneHotEncoder()
X_spatial = encoder.fit_transform(spatial_data[['Country Name']])
X_spatial = pd.DataFrame(X_spatial.toarray(), columns=encoder.get_feature_names_out(['Country Name']))

# Add the 'MPA' feature to the spatial data
X_spatial['MPA'] = spatial_data['MPA']
y_spatial = spatial_data['Fishery']

# Split the data into training and test sets
X_train_spatial, X_test_spatial, y_train_spatial, y_test_spatial = train_test_split(X_spatial, y_spatial, test_size=0.2, random_state=42)

# Initialize the models
rf_spatial = RandomForestRegressor(random_state=42)
gb_spatial = GradientBoostingRegressor(random_state=42)
svm_spatial = SVR()
# Logistic Regression is generally used for classification, so it might not be appropriate for this regression task.

# Fit the models
rf_spatial.fit(X_train_spatial, y_train_spatial)
gb_spatial.fit(X_train_spatial, y_train_spatial)
svm_spatial.fit(X_train_spatial, y_train_spatial)

# Predict on the test set
y_pred_rf_spatial = rf_spatial.predict(X_test_spatial)
y_pred_gb_spatial = gb_spatial.predict(X_test_spatial)
y_pred_svm_spatial = svm_spatial.predict(X_test_spatial)

# Evaluate the models
mse_rf_spatial = mean_squared_error(y_test_spatial, y_pred_rf_spatial)
r2_rf_spatial = r2_score(y_test_spatial, y_pred_rf_spatial)

mse_gb_spatial = mean_squared_error(y_test_spatial, y_pred_gb_spatial)
r2_gb_spatial = r2_score(y_test_spatial, y_pred_gb_spatial)

mse_svm_spatial = mean_squared_error(y_test_spatial, y_pred_svm_spatial)
r2_svm_spatial = r2_score(y_test_spatial, y_pred_svm_spatial)

print(f"Random Forest - MSE: {mse_rf_spatial}, R²: {r2_rf_spatial}")
print(f"Gradient Boosting - MSE: {mse_gb_spatial}, R²: {r2_gb_spatial}")
print(f"SVM - MSE: {mse_svm_spatial}, R²: {r2_svm_spatial}")


Random Forest - MSE: 931224091123893.2, R²: -0.0907918945765287
Gradient Boosting - MSE: 878204828497067.0, R²: -0.028687635804649503
SVM - MSE: 929091544804411.8, R²: -0.0882939305931314
