In [None]:
import logging
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import joblib  # For model saving/loading

In [None]:
class PredictiveModel:
    def __init__(self, model_path: str = None):
        """
        Initializes the PredictiveModel.

        Args:
            model_path (str, optional): Path to a pre-trained model. Defaults to None.
        """
        self.logger = logging.getLogger(self.__class__.__name__)
        self.logger.info("Initializing Predictive Model")

        # Initialize the model (Random Forest) with Pipeline and ColumnTransformer
        self.model = Pipeline(steps=[
            ('preprocessor', ColumnTransformer(transformers=[
                ('num', Pipeline(steps=[
                    ('imputer', SimpleImputer(strategy='mean')),  # Impute missing numerical values
                    ('scaler', StandardScaler())
                ]), ['terrain_feature_1', 'terrain_feature_2', 'orbat_strength']),
                ('cat', OneHotEncoder(handle_unknown='ignore'), ['weather_condition', 'previous_op_outcome'])
            ])),
            ('classifier', RandomForestClassifier(random_state=42))
        ])

        # Load pre-trained model if model_path is provided
        if model_path:
            self.load_model(model_path)


In [None]:
def train(self, orbat_data: list, terrain_features: list, previous_ops: list) -> None:
        """
        Trains the predictive model using historical data.

        Args:
            orbat_data (list): Data on enemy order of battle.
            terrain_features (list): Extracted terrain features from TerrainAnalysisModel.
            previous_ops (list): Data from previous operations.

        Returns:
            None
        """
        self.logger.info("Training Predictive Model")

        try:
            # 1. Data Preprocessing
            X, y = self.preprocess_data(orbat_data, terrain_features, previous_ops)

            # 2. Cross-validation
            self.logger.info("Performing cross-validation")
            cv_scores = cross_val_score(self.model, X, y, cv=5, scoring='accuracy')
            mean_accuracy = cv_scores.mean()
            self.logger.info("Cross-Validation Accuracy Scores: %s", cv_scores)
            self.logger.info("Mean Cross-Validation Accuracy: %.4f", mean_accuracy)

            # 3. Fit the model on the entire dataset after cross-validation
            self.model.fit(X, y)
            self.logger.info("Model training completed")

            # 4. Feature Importances
            self.log_feature_importances()

        except Exception as e:
            self.logger.error("Error during training: %s", e)
            raise


In [None]:
def predict(self, orbat_data: list, terrain_features: list, previous_ops: list) -> list:
        """
        Predicts enemy courses of action (COAs).

        Args:
            orbat_data (list): Current enemy order of battle data.
            terrain_features (list): Terrain features of the area of operations.
            previous_ops (list): Data from previous operations.

        Returns:
            list: Predicted enemy COAs.
        """
        self.logger.info("Predicting Enemy Courses of Action")

        try:
            # 1. Data Preprocessing
            X_new = self.preprocess_data(orbat_data, terrain_features, previous_ops, for_training=False)

            # 2. Make Predictions
            predicted_coas = self.model.predict(X_new)

            # 3. Convert Indices to COA Labels (if necessary)
            # Example: Assuming COA labels are already meaningful
            return predicted_coas.tolist()

        except Exception as e:
            self.logger.error("Error during prediction: %s", e)
            raise

In [None]:
def preprocess_data(self, orbat_data: list, terrain_features: list, previous_ops: list, for_training: bool = True):
        """
        Preprocesses the data for training and prediction.

        Args:
            orbat_data (list): Enemy order of battle data.
            terrain_features (list): Extracted terrain features.
            previous_ops (list): Data from previous operations.
            for_training (bool, optional): Whether the preprocessing is for training or prediction. 
                                            Defaults to True.

        Returns:
            tuple or pd.DataFrame: Feature matrix (X) and target variable (y) for training,
                                   or feature matrix (X) for prediction.
        """
        # Convert input lists to DataFrames
        orbat_df = pd.DataFrame(orbat_data, columns=['unit_type', 'orbat_strength'])
        terrain_df = pd.DataFrame(terrain_features, columns=['terrain_feature_1', 'terrain_feature_2'])  # Example features
        previous_ops_df = pd.DataFrame(previous_ops, columns=['weather_condition', 'previous_op_outcome', 'coa_taken'])

        # Combine data sources
        if for_training:
            df = pd.concat([orbat_df, terrain_df, previous_ops_df], axis=1)
        else:
            # For prediction, exclude 'coa_taken'
            previous_ops_df = previous_ops_df.drop(columns=['coa_taken'], errors='ignore')
            df = pd.concat([orbat_df, terrain_df, previous_ops_df], axis=1)

        # Validate required columns
        required_columns = ['terrain_feature_1', 'terrain_feature_2', 'orbat_strength', 'weather_condition', 'previous_op_outcome']
        if for_training:
            required_columns.append('coa_taken')

        missing_columns = [col for col in required_columns if col not in df.columns]
        if missing_columns:
            self.logger.error("Missing columns in input data: %s", missing_columns)
            raise ValueError(f"Missing columns in input data: {missing_columns}")

        if for_training:
            X = df[['terrain_feature_1', 'terrain_feature_2', 'orbat_strength', 'weather_condition', 'previous_op_outcome']]
            y = df['coa_taken']  # Target variable
            return X, y
        else:
            X = df[['terrain_feature_1', 'terrain_feature_2', 'orbat_strength', 'weather_condition', 'previous_op_outcome']]
            return X

In [None]:
def save_model(self, model_path: str) -> None:
        """
        Saves the trained model to a file.

        Args:
            model_path (str): Path to save the model.
        """
        try:
            joblib.dump(self.model, model_path)
            self.logger.info("Model saved to %s", model_path)
        except Exception as e:
            self.logger.error("Failed to save model to %s: %s", model_path, e)
            raise

In [None]:
def load_model(self, model_path: str) -> None:
        """
        Loads a pre-trained model from a file.

        Args:
            model_path (str): Path to the saved model.
        """
        try:
            self.model = joblib.load(model_path)
            self.logger.info("Model loaded from %s", model_path)
        except FileNotFoundError:
            self.logger.error("Model file not found at %s", model_path)
            raise
        except Exception as e:
            self.logger.error("Error loading model: %s", e)
            raise

In [None]:
def log_feature_importances(self) -> None:
        """
        Logs the feature importances from the trained Random Forest model.
        """
        try:
            classifier = self.model.named_steps['classifier']
            feature_names_num = self.model.named_steps['preprocessor'].transformers_[0][1].named_steps['scaler'].get_feature_names_out(['terrain_feature_1', 'terrain_feature_2', 'orbat_strength']).tolist()
            feature_names_cat = self.model.named_steps['preprocessor'].transformers_[1][1].get_feature_names_out(['weather_condition', 'previous_op_outcome']).tolist()
            feature_names = feature_names_num + feature_names_cat

            feature_importances = classifier.feature_importances_
            feature_importance_dict = dict(zip(feature_names, feature_importances))

            # Sort features by importance
            sorted_features = sorted(feature_importance_dict.items(), key=lambda x: x[1], reverse=True)
            self.logger.info("Feature Importances:")
            for feature, importance in sorted_features:
                self.logger.info(f"{feature}: {importance:.4f}")
        except Exception as e:
            self.logger.error("Error logging feature importances: %s", e)
            raise
