In [2]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc
from sklearn.model_selection import RandomizedSearchCV
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score, accuracy_score


In [None]:
class MilitaryRoboticSystemMaintenance:
    def __init__(self):
        self.model = None
        self.scaler = StandardScaler()
        self.feature_names = None

    def generate_synthetic_data(self, n_samples=1000):
        df = pd.read_csv('smart_manufacturing_temperature_regulation.csv')
        current_temperature = df['Current Temperature (°C)'].values
        setpoint_temperature = df['Setpoint Temperature (°C)'].values  
        ambient_temperature = df['Ambient Temperature (°C)'].values
        np.random.seed(42)
        data = {
        'system_temperature': current_temperature,
        'power_unit_temperature': setpoint_temperature,
        'ambient_temperature': ambient_temperature,
        'system_vibration': np.random.uniform(0.1, 2.5, n_samples),
        'power_unit_vibration':  np.random.uniform(0.2, 3.0, n_samples),
        'hydraulic_pressure': np.random.uniform(1000, 3000,n_samples),
        'voltage_fluctuation': np.random.uniform(-0.5, 0.5, n_samples),
        'current_draw': np.random.uniform(10, 30, n_samples),
        'operational_hours': np.random.uniform(0, 5000, n_samples),
        'duty_cycle': np.random.uniform(20, 90, n_samples),
        'load_capacity': np.random.uniform(40, 100, n_samples),
        'days_since_maintenance': np.random.uniform(0, 365, n_samples),
        'previous_failures': np.random.poisson(0.5, n_samples)
        }
        failure_probability = (
            0.3 * (data['system_temperature'] > 70).astype(int) +
            0.4 * (data['power_unit_temperature'] > 80).astype(int) +
            0.4 * (data['system_vibration'] > 2.0).astype(int) +
            0.3 * (data['power_unit_vibration'] > 2.5).astype(int) +
            0.2 * (data['operational_hours'] > 4000).astype(int) +
            0.3 * (data['duty_cycle'] > 80).astype(int) +
            0.4 * (data['days_since_maintenance'] > 300).astype(int) +
            0.3 * (data['previous_failures'] > 2).astype(int)
        ) / 2.7
        data['is_failure'] = np.random.binomial(1, failure_probability)
        df = pd.DataFrame(data)
        df['timestamp'] = pd.date_range(start='2024-01-01',
        periods=n_samples, freq='h')
        return df
    def preprocess_data(self, data):
        self.feature_names = [col for col in data.columns if col not in ['is_failure', 'timestamp']]
        X = data[self.feature_names]
        y = data['is_failure']
        X_scaled = self.scaler.fit_transform(X)
        return train_test_split(X_scaled, y, test_size=0.3, random_state=42)
    def train_model(self, X_train, y_train):
        rf = RandomForestClassifier(random_state=42)
        param_dist = {
            'n_estimators': np.arange(50, 300, 50), 
            'max_depth': [5, 10, 15, 20, None],
            'min_samples_split': [2, 5, 10, 20],
            'min_samples_leaf': [1, 2, 4, 8],   
            'max_features': ['sqrt', 'log2', None],
            'bootstrap': [True, False]                 
        }
        random_search = RandomizedSearchCV(
            estimator=rf,
            param_distributions=param_dist,
            n_iter=30,
            cv=5,
            scoring='f1',
            verbose=1,
            n_jobs=-1,
            random_state=42
        )
        random_search.fit(X_train, y_train)
        self.model = random_search.best_estimator_
        return self.model
    def evaluate_model(self, X_test, y_test):
        y_pred = self.model.predict(X_test)
        print("Classification Report:")
        print(classification_report(y_test, y_pred))
        cm = confusion_matrix(y_test,
        y_pred)
        plt.figure(figsize=(3, 3))
        sns.heatmap(cm, annot=True, fmt='d',
        cmap='Blues')
        plt.title('Confusion Matrix')
        plt.ylabel('True Label')
        plt.xlabel('Predicted Label')
        plt.show()
        # ROC Curve
        y_pred_proba = self.model.predict_proba(X_test)[:, 1]
        fpr, tpr, _ = roc_curve(y_test,
        y_pred_proba)
        roc_auc = auc(fpr, tpr)
        plt.figure(figsize=(4, 4))
        plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUC = {roc_auc:.2f})')
        plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('Receiver Operating Characteristic (ROC) Curve')
        plt.legend(loc="lower right")
        plt.show()
    def verify_with_gru(self, X_train, X_test, y_train, y_test, epochs=30, batch_size=32):
        # Scale features for neural network
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)
        
        # Reshape input for GRU: (samples, timesteps, features)
        # If your data isn’t sequential, we treat each feature vector as a single time step
        X_train_scaled = X_train_scaled.reshape((X_train_scaled.shape[0], 1, X_train_scaled.shape[1]))
        X_test_scaled = X_test_scaled.reshape((X_test_scaled.shape[0], 1, X_test_scaled.shape[1]))
        
        # Build GRU model
        model = Sequential([
            GRU(64, input_shape=(X_train_scaled.shape[1], X_train_scaled.shape[2]), return_sequences=False),
            Dropout(0.3),
            Dense(32, activation='relu'),
            Dense(1, activation='sigmoid')
        ])
        
        # Compile model
        model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])
        
        # Train GRU
        history = model.fit(
            X_train_scaled, y_train,
            epochs=epochs,
            batch_size=batch_size,
            validation_split=0.2,
            verbose=1
        )
        
        # Predict on test set
        y_pred_prob = model.predict(X_test_scaled)
        y_pred = (y_pred_prob > 0.5).astype(int)
        
        # Evaluate performance
        gru_f1 = f1_score(y_test, y_pred)
        gru_acc = accuracy_score(y_test, y_pred)
        
        print(f"\n✅ GRU Model Evaluation:")
        print(f"Accuracy: {gru_acc:.4f}")
        print(f"F1 Score: {gru_f1:.4f}")
        
        # Compare with Random Forest
        rf_pred = self.model.predict(X_test)
        rf_f1 = f1_score(y_test, rf_pred)
        rf_acc = accuracy_score(y_test, rf_pred)
        
        print(f"\n📊 Random Forest vs GRU Comparison:")
        print(f"Random Forest - Accuracy: {rf_acc:.4f}, F1: {rf_f1:.4f}")
        print(f"GRU Model     - Accuracy: {gru_acc:.4f}, F1: {gru_f1:.4f}")
        
        # Optional: return both models’ scores for analysis
        return {
            'random_forest': {'accuracy': rf_acc, 'f1': rf_f1},
            'gru': {'accuracy': gru_acc, 'f1': gru_f1}
        }

    def analyze_feature_importance(self):
        importances = self.model.feature_importances_
        indices = np.argsort(importances)[::-1]
        plt.figure(figsize=(8, 4))
        plt.title('Feature Importance for Failure Prediction')
        plt.bar(range(len(importances)), importances[indices])
        plt.xticks(range(len(importances)), [self.feature_names[i] for i in indices], rotation=45)
        plt.tight_layout()
        plt.show()
    def correlation_heatmap(self, data):
        correlation_matrix = data.corr()
        plt.figure(figsize=(8, 6))
        sns.heatmap(correlation_matrix,
        annot=True, fmt='.2f', cmap='coolwarm')
        plt.title('Feature Correlation Heatmap')
        plt.show()

    def feature_distribution(self, data):
        features = [
        'system_temperature',
        'power_unit_temperature',
        'ambient_temperature',
        'system_vibration',
        'power_unit_vibration',
        'hydraulic_pressure',
        'voltage_fluctuation',
        'current_draw',
        'operational_hours',
        'duty_cycle',
        'load_capacity',
        'days_since_maintenance',
        'previous_failures'
        ]
        plt.figure(figsize=(15, 10))
        for i, feature in enumerate(features):
            plt.subplot(4, 4, i + 1)
            sns.histplot(data, x=feature,
            hue='is_failure', kde=True, bins=30,
            palette='coolwarm')
            plt.title(f'Distribution of {feature}')
        plt.tight_layout()
        plt.show()
    def time_series_plot(self, data):
        time_grouped = data.groupby(data['timestamp'].dt.date)['is_failure'].mean()
        plt.figure()
        plt.plot(time_grouped.index,time_grouped.values, marker='o',color='teal')
        plt.title('Average Failure ProbabilityOver Time')
        plt.xlabel('Date')
        plt.ylabel('Failure Probability')
        plt.xticks(rotation=45)
        plt.grid()
        plt.show()
    def predict_maintenance_needs(self,current_data):
        scaled_data = self.scaler.transform(current_data[self.feature_names])
        failure_prob = self.model.predict_proba(scaled_data)[:, 1]
        predictions = pd.DataFrame({
            'Failure_Probability': failure_prob,
            'Risk_Level': pd.cut(failure_prob,
        bins=[0, 0.3, 0.6, 1],
        labels=['Low', 'Medium', 'High'])})
        return predictions