#### Fixed features

In [1]:
import requests

url = 'http://127.0.0.1:5000/predict'

# Define the models you want to test
models = ['random_forest', 'decision_tree', 'xgboost']

# Define the fixed non-failure features
fixed_features = [0.549671415, 76.99677718, 93.24821725, 40.46096221]

# Loop through each model and test with the fixed features
for i, model in enumerate(models, start=1):
    data = {
        'features': fixed_features,
        'model': model
    }

    # Send POST request to the API
    response = requests.post(url, json=data)
    
    # Print the response from the API along with the features
    try:
        print(f"Request {i} - Model: {model} - Features: {fixed_features} - Prediction: {response.json()}")
    except Exception as e:
        print(f"Request {i} - Model: {model} - Features: {fixed_features} - Error: {e}")


Request 1 - Model: random_forest - Features: [0.549671415, 76.99677718, 93.24821725, 40.46096221] - Prediction: {'prediction': [0]}
Request 2 - Model: decision_tree - Features: [0.549671415, 76.99677718, 93.24821725, 40.46096221] - Prediction: {'prediction': [0]}
Request 3 - Model: xgboost - Features: [0.549671415, 76.99677718, 93.24821725, 40.46096221] - Prediction: {'prediction': [0]}


### Simulation Results on 1000 Random Inputs for all three Models

In [2]:
import pandas as pd
import numpy as np
import requests
from collections import defaultdict

# Define the API endpoint
url = 'http://127.0.0.1:5000/predict'

# Define the models you want to test
models = ['random_forest', 'decision_tree', 'xgboost']

# Define the ranges for each feature based on your dataset
vibration_range = (0.175873, 0.885273)
temperature_range = (55.298057, 85.965538)
pressure_range = (69.804878, 139.262377)
flow_rate_range = (35.352757, 66.215465)

# Generate timestamps
start_time = pd.to_datetime('2024-01-01 00:00')
end_time = pd.to_datetime('2024-02-11 15:00')
timestamps = pd.date_range(start=start_time, end=end_time, freq='h')  # Use 'h' for hourly

# Initialize lists to hold data
data = []

# Initialize a dictionary to hold prediction counts for each model
prediction_counts = {model: defaultdict(int) for model in models}

# Loop through each timestamp to generate random data and predictions
for timestamp in timestamps:
    # Generate random features within the specified ranges and round to 3 decimal places
    random_vibration = round(np.random.uniform(*vibration_range), 3)
    random_temperature = round(np.random.uniform(*temperature_range), 3)
    random_pressure = round(np.random.uniform(*pressure_range), 3)
    random_flow_rate = round(np.random.uniform(*flow_rate_range), 3)
    
    random_features = [random_vibration, random_temperature, random_pressure, random_flow_rate]
    
    # Get a prediction for each model and average the predictions to decide failure
    failure_predictions = []
    for model in models:
        data_point = {
            'features': random_features,
            'model': model
        }
        try:
            response = requests.post(url, json=data_point)
            prediction = response.json().get('prediction', [None])[0]
            failure_predictions.append(prediction)
            prediction_counts[model][prediction] += 1
        except Exception as e:
            print(f"Error for Timestamp {timestamp} - Model: {model} - Features: {random_features} - Error: {e}")
            failure_predictions.append(None)

    # Compute the final failure prediction as the majority vote (or you can use any other method)
    final_failure = max(set(failure_predictions), key=failure_predictions.count, default=None)
    
    # Append data to the list
    data.append([timestamp, random_vibration, random_temperature, random_pressure, random_flow_rate, final_failure])

# Create DataFrame
df = pd.DataFrame(data, columns=['timestamp', 'vibration_level', 'temperature_C', 'pressure_PSI', 'flow_rate_m3h', 'failure'])

# Save DataFrame to CSV
df.to_csv('simulated_pump_data_for_non_regularized_non_tuned_model.csv', index=False)

# Print prediction statistics
for model, counts in prediction_counts.items():
    print(f"\nModel: {model}")
    for prediction, count in counts.items():
        print(f"Prediction {prediction}: {count} times")

print("CSV file has been created and saved.")


Model: random_forest
Prediction 1: 431 times
Prediction 0: 569 times

Model: decision_tree
Prediction 1: 489 times
Prediction 0: 511 times

Model: xgboost
Prediction 1: 404 times
Prediction 0: 596 times
CSV file has been created and saved.


### Results for 1000 Randomly Generated inputs for XGBoost Model

In [4]:
import pandas as pd
import numpy as np
import requests
from collections import defaultdict

# Define the API endpoint
url = 'http://127.0.0.1:5000/predict'

# Define the model you want to test
model = 'xgboost_regularized_tuned_model'

# Define the ranges for each feature based on your dataset
vibration_range = (0.175873, 0.885273)
temperature_range = (55.298057, 85.965538)
pressure_range = (69.804878, 139.262377)
flow_rate_range = (35.352757, 66.215465)

# Generate timestamps
start_time = pd.to_datetime('2024-01-01 00:00')
end_time = pd.to_datetime('2024-02-11 15:00')
timestamps = pd.date_range(start=start_time, end=end_time, freq='h')  # Use 'h' for hourly

# Initialize lists to hold data
data = []

# Initialize a dictionary to hold prediction counts for the model
prediction_counts = defaultdict(int)

# Loop through each timestamp to generate random data and predictions
for timestamp in timestamps:
    # Generate random features within the specified ranges and round to 3 decimal places
    random_vibration = round(np.random.uniform(*vibration_range), 3)
    random_temperature = round(np.random.uniform(*temperature_range), 3)
    random_pressure = round(np.random.uniform(*pressure_range), 3)
    random_flow_rate = round(np.random.uniform(*flow_rate_range), 3)
    
    random_features = [random_vibration, random_temperature, random_pressure, random_flow_rate]
    
    # Get a prediction for the XGBoost model
    data_point = {
        'features': random_features
    }
    try:
        response = requests.post(url, json=data_point)
        prediction = response.json().get('prediction', [None])[0]
        data.append([timestamp, random_vibration, random_temperature, random_pressure, random_flow_rate, prediction])
        prediction_counts[prediction] += 1
    except Exception as e:
        print(f"Error for Timestamp {timestamp} - Features: {random_features} - Error: {e}")
        data.append([timestamp, random_vibration, random_temperature, random_pressure, random_flow_rate, None])

# Create DataFrame
df = pd.DataFrame(data, columns=['timestamp', 'vibration_level', 'temperature_C', 'pressure_PSI', 'flow_rate_m3h', 'failure'])

# Save DataFrame to CSV
df.to_csv('simulated_data_for_regularized_xgboost.csv', index=False)

# Print prediction statistics for XGBoost model
print(f"\nModel: {model}")
for prediction, count in prediction_counts.items():
    print(f"Prediction {prediction}: {count} times")

print("CSV file has been created and saved.")


Model: xgboost_regularized_tuned_model
Prediction 0: 609 times
Prediction 1: 391 times
CSV file has been created and saved.


## Tweaked Model

In [5]:
import pandas as pd
import numpy as np
import requests
from collections import defaultdict

# Define the API endpoint
url = 'http://127.0.0.1:5000/predict'

# Define the model you want to test
model = 'tweaked_xgboost_regularized_tuned_model'

# Define the ranges for each feature based on your dataset
vibration_range = (0.175873, 0.885273)
temperature_range = (55.298057, 85.965538)
pressure_range = (69.804878, 139.262377)
flow_rate_range = (35.352757, 66.215465)

# Generate timestamps
start_time = pd.to_datetime('2024-01-01 00:00')
end_time = pd.to_datetime('2024-02-11 15:00')
timestamps = pd.date_range(start=start_time, end=end_time, freq='h')  # Use 'h' for hourly

# Initialize lists to hold data
data = []

# Initialize a dictionary to hold prediction counts for the model
prediction_counts = defaultdict(int)

# Loop through each timestamp to generate random data and predictions
for timestamp in timestamps:
    # Generate random features within the specified ranges and round to 3 decimal places
    random_vibration = round(np.random.uniform(*vibration_range), 3)
    random_temperature = round(np.random.uniform(*temperature_range), 3)
    random_pressure = round(np.random.uniform(*pressure_range), 3)
    random_flow_rate = round(np.random.uniform(*flow_rate_range), 3)
    
    random_features = [random_vibration, random_temperature, random_pressure, random_flow_rate]
    
    # Get a prediction for the XGBoost model
    data_point = {
        'features': random_features
    }
    try:
        response = requests.post(url, json=data_point)
        prediction = response.json().get('prediction', [None])[0]
        data.append([timestamp, random_vibration, random_temperature, random_pressure, random_flow_rate, prediction])
        prediction_counts[prediction] += 1
    except Exception as e:
        print(f"Error for Timestamp {timestamp} - Features: {random_features} - Error: {e}")
        data.append([timestamp, random_vibration, random_temperature, random_pressure, random_flow_rate, None])

# Create DataFrame
df = pd.DataFrame(data, columns=['timestamp', 'vibration_level', 'temperature_C', 'pressure_PSI', 'flow_rate_m3h', 'failure'])

# Save DataFrame to CSV
df.to_csv('simulated_data_for_regularized_tweaked_xgboost.csv', index=False)

# Print prediction statistics for XGBoost model
print(f"\nModel: {model}")
for prediction, count in prediction_counts.items():
    print(f"Prediction {prediction}: {count} times")

print("CSV file has been created and saved.")


Model: tweaked_xgboost_regularized_tuned_model
Prediction 1: 317 times
Prediction 0: 683 times
CSV file has been created and saved.


## Feature Engineered Model

In [None]:
import pandas as pd
import numpy as np
import requests
from collections import defaultdict

# Define the API endpoint
url = 'http://127.0.0.1:5000/predict'

# Define the model you want to test
model = 'xgboost_feature_engineered_model'

# Define the ranges for each feature based on your dataset
vibration_range = (0.175873, 0.885273)
temperature_range = (55.298057, 85.965538)
pressure_range = (69.804878, 139.262377)
flow_rate_range = (35.352757, 66.215465)

# Generate timestamps
start_time = pd.to_datetime('2024-01-01 00:00')
end_time = pd.to_datetime('2024-02-11 15:00')
timestamps = pd.date_range(start=start_time, end=end_time, freq='h')  # Use 'h' for hourly

# Initialize lists to hold data
data = []

# Initialize a dictionary to hold prediction counts for the model
prediction_counts = defaultdict(int)

# Loop through each timestamp to generate random data and predictions
for timestamp in timestamps:
    # Generate random features within the specified ranges and round to 3 decimal places
    random_vibration = round(np.random.uniform(*vibration_range), 3)
    random_temperature = round(np.random.uniform(*temperature_range), 3)
    random_pressure = round(np.random.uniform(*pressure_range), 3)
    random_flow_rate = round(np.random.uniform(*flow_rate_range), 3)
    
    random_features = [random_vibration, random_temperature, random_pressure, random_flow_rate]
    
    # Get a prediction for the XGBoost model
    data_point = {
        'features': random_features
    }
    try:
        response = requests.post(url, json=data_point)
        prediction = response.json().get('prediction', [None])[0]
        data.append([timestamp, random_vibration, random_temperature, random_pressure, random_flow_rate, prediction])
        prediction_counts[prediction] += 1
    except Exception as e:
        print(f"Error for Timestamp {timestamp} - Features: {random_features} - Error: {e}")
        data.append([timestamp, random_vibration, random_temperature, random_pressure, random_flow_rate, None])

# Create DataFrame
df = pd.DataFrame(data, columns=['timestamp', 'vibration_level', 'temperature_C', 'pressure_PSI', 'flow_rate_m3h', 'failure'])

# Save DataFrame to CSV
df.to_csv('simulated_data_xgboost_feature_engineered_model.csv', index=False)

# Print prediction statistics for XGBoost model
print(f"\nModel: {model}")
for prediction, count in prediction_counts.items():
    print(f"Prediction {prediction}: {count} times")

print("CSV file has been created and saved.")


Model: xgboost_feature_engineered_model
Prediction 0: 727 times
Prediction 1: 273 times
CSV file has been created and saved.
