In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from joblib import load
import numpy as np
from google.colab import drive

drive.mount('/content/drive')

# Change directory to the location of the file
%cd /content/drive/Shareddrives/ACS6403 - Group Project/Deliverable 5 Final Project Report/Prediction Model/

# List the files to confirm
!ls

input_data_1 = '/content/drive/Shareddrives/ACS6403 - Group Project/Preprocessed Data/input_data.txt'  # Update the path
output_data_1 = '/content/drive/Shareddrives/ACS6403 - Group Project/Preprocessed Data/output_data.txt'  # Update the path

# Load the input and output data
input_Data = pd.read_csv(input_data_1, sep=',')
input_Data.columns = ['Input 1', 'Input 2', 'Input 3']

output_Data = pd.read_csv(output_data_1, sep='\s+')
output_Data.columns = ['Output 1', 'Output 2', 'Output 3', 'Output 4', 'Output 5', 'Output 6', 'Output 7']

# Initialize MinMaxScaler
scaler = MinMaxScaler()

# Scale input and output data
input_Data_normalized = scaler.fit_transform(input_Data)
input_Data_normalized = pd.DataFrame(input_Data_normalized, columns=input_Data.columns)
output_Data_normalized = scaler.fit_transform(output_Data)
output_Data_normalized = pd.DataFrame(output_Data_normalized, columns=output_Data.columns)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(input_Data_normalized, output_Data_normalized, test_size=0.2, random_state=42)

# List of algorithms
algorithms =  ['Poly','randomforest','SVM']

# Load models and store their predictions
all_predictions = []

for algo_name in algorithms:
    for index in range(1, 7):  # Assuming there are 7 models per algorithm
        filename = f'Copy of {algo_name}_model_{index}.joblib'
        model = load(filename)

        # Check the model type and predict accordingly
        if hasattr(model, 'predict'):
            predictions = model.predict(X_test)
        else:
            # Handle the VARResults separately
            if 'VARResults' in str(type(model)):
                predictions = model.forecast(X_test.values, steps=1)  # Adjust 'steps' as needed
                predictions = predictions[0]  # Select the first step
            else:
                raise TypeError(f'Unsupported model type: {type(model)}')

        all_predictions.append(predictions.reshape(-1, 1))

# Combine all predictions into a single matrix
combined_predictions = np.hstack(all_predictions)
print(combined_predictions)

# Continue with train-test split for combined predictions
X_train_preds, X_test_preds, y_train_preds, y_test_preds = train_test_split(combined_predictions, y_test, test_size=0.5, random_state=42)
print("X_train_preds shape:", X_train_preds.shape)
print("y_train_preds shape:", y_train_preds.shape)
print("X_test_preds shape:", X_test_preds.shape)
print("y_test_preds shape:", y_test_preds.shape)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/Shareddrives/ACS6403 - Group Project/Deliverable 5 Final Project Report/Prediction Model
'Copy of Poly_model_0.joblib'	       'Copy of SVM_model_6.joblib'
'Copy of Poly_model_1.joblib'	       'Copy of SVM_model_7.joblib'
'Copy of Poly_model_2.joblib'	       'Copy of System_model_1.joblib'
'Copy of Poly_model_3.joblib'	       'Copy of System_model_2.joblib'
'Copy of Poly_model_4.joblib'	       'Copy of System_model_3.joblib'
'Copy of Poly_model_5.joblib'	       'Copy of System_model_4.joblib'
'Copy of Poly_model_6.joblib'	       'Copy of System_model_5.joblib'
'Copy of Poly_model_7.joblib'	       'Copy of System_model_6.joblib'
'Copy of randomforest_model_1.joblib'  'Copy of System_model_7.joblib'
'Copy of randomforest_model_2.joblib'  'Deep Feedforward Network'
'Copy of randomforest_model_3.joblib'   input_data_normalized.csv
'Copy of randomfor

ValueError: shapes (2,2) and (3,) not aligned: 2 (dim 1) != 3 (dim 0)

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt
import numpy as np

# Initialize the Decision Tree Regressor
regressor = DecisionTreeRegressor(random_state=42)

# Train the model using all target columns
regressor.fit(X_train_preds, y_train_preds)

# Predict on the test data
predictions = regressor.predict(X_test_preds)

# Evaluate the model using Mean Squared Error and Mean Absolute Error
mse = mean_squared_error(y_test_preds, predictions, multioutput='raw_values')
mae = mean_absolute_error(y_test_preds, predictions, multioutput='raw_values')
print(f'Mean Squared Error for each output: {mse}')
print(f'Mean Absolute Error for each output: {mae}')

# Optionally, compute an average MSE and MAE
average_mse = np.mean(mse)
average_mae = np.mean(mae)
print(f'Average Mean Squared Error: {average_mse}')
print(f'Average Mean Absolute Error: {average_mae}')

models = []
mse_scores = []
mae_scores = []

# Train one Decision Tree Regressor for each target and evaluate
for i in range(y_train_preds.shape[1]):
    model = DecisionTreeRegressor(random_state=42)
    model.fit(X_train_preds, y_train_preds.iloc[:, i])  # Train model on each target column
    pred = model.predict(X_test_preds)

    # Calculate MSE and MAE
    mse = mean_squared_error(y_test_preds.iloc[:, i], pred)
    mae = mean_absolute_error(y_test_preds.iloc[:, i], pred)
    models.append(model)
    mse_scores.append(mse)
    mae_scores.append(mae)

    # Plot predictions vs true values
    plt.figure(figsize=(10, 6))
    plt.scatter(y_test_preds.iloc[:, i], pred, edgecolor='k', alpha=0.6, label='Predictions')
    plt.plot(y_test_preds.iloc[:, i], y_test_preds.iloc[:, i], 'r--', label='Actual')
    plt.title(f'Regression Results for Output Column {i+1}')
    plt.xlabel('Actual Values')
    plt.ylabel('Predicted Values')
    plt.legend()
    plt.grid(True)
    plt.show()

    print(f'MSE for output column {i+1}: {mse}')
    print(f'MAE for output column {i+1}: {mae}')

# Optionally, average the MSE and MAE scores for an overall performance metric
average_mse = sum(mse_scores) / len(mse_scores)
average_mae = sum(mae_scores) / len(mae_scores)
print(f'Average Mean Squared Error: {average_mse}')
print(f'Average Mean Absolute Error: {average_mae}')

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Assuming `model` is your trained neural network and `X_test` is your test dataset
# Let's say we're interested in the first feature column for sensitivity analysis
feature_index = 0  # Index of the feature to analyze
values = np.linspace(start=np.min(X_test[:, feature_index]),
                     stop=np.max(X_test[:, feature_index]),
                     num=100)  # Create 100 points between min and max

outputs = []
for val in values:
    X_temp = X_test.copy()
    X_temp[:, feature_index] = val  # Set all instances of the selected feature to `val`
    pred = model.predict(X_temp)
    outputs.append(np.mean(pred))  # Store the average prediction (or choose another summary statistic)

# Plot the results
plt.figure(figsize=(10, 6))
plt.plot(values, outputs, 'b-')
plt.xlabel(f'Value of Feature {feature_index}')
plt.ylabel('Predicted Output')
plt.title('Sensitivity Analysis of Feature ' + str(feature_index))
plt.grid(True)
plt.show()