# Reservoir Behavior Prediction using ML
This notebook demonstrates how to use machine learning models to predict cumulative oil production (`Np`) from reservoir properties using synthetic data.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

import xgboost as xgb


In [None]:
# Load synthetic data
df = pd.read_csv('../data/synthetic_reservoir_data.csv')
df.head()

In [None]:
# Pairplot and correlation heatmap
sns.pairplot(df[['porosity', 'permeability', 'pressure_initial', 'Np']])
plt.show()

sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.show()


In [None]:
# Define features and target
X = df.drop(columns=['Np'])
y = df['Np']

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Train RandomForest
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

y_pred_rf = rf.predict(X_test)

print('RandomForest RMSE:', np.sqrt(mean_squared_error(y_test, y_pred_rf)))
print('RandomForest R2:', r2_score(y_test, y_pred_rf))

In [None]:
# Train XGBoost
xgb_model = xgb.XGBRegressor(n_estimators=200, learning_rate=0.05, max_depth=5, random_state=42)
xgb_model.fit(X_train, y_train)

y_pred_xgb = xgb_model.predict(X_test)

print('XGBoost RMSE:', np.sqrt(mean_squared_error(y_test, y_pred_xgb)))
print('XGBoost R2:', r2_score(y_test, y_pred_xgb))

In [None]:
# Plot feature importance
xgb.plot_importance(xgb_model)
plt.show()