Question 3: Calculate R2 Score <br>

Description:<br>
Calculate the R2 score for a set of predicted values.

In [1]:
# Write your code here

import numpy as np
from sklearn.metrics import r2_score

print("--- Question 3: Calculate R2 Score ---")
print("Objective: Calculate the R2 score for a set of predicted values.")
print("------------------------------------------------------------------")

def calculate_and_print_r2(y_true, y_pred, scenario_name):
    """
    Calculates the R2 score and prints it along with an interpretation.
    """
    try:
        r2 = r2_score(y_true, y_pred)
        print(f"\n--- Scenario: {scenario_name} ---")
        print(f"Actual Values (y_true): {y_true}")
        print(f"Predicted Values (y_pred): {y_pred}")
        print(f"Calculated R2 Score: {r2:.4f}")

        if r2 >= 0.75:
            print("Interpretation: Excellent fit. The model explains a very high proportion of the variance in the target variable.")
        elif r2 >= 0.5:
            print("Interpretation: Good fit. The model explains a significant portion of the variance in the target variable.")
        elif r2 >= 0:
            print("Interpretation: Moderate to poor fit. The model explains some, little, or none of the variance in the target variable compared to simply predicting the mean.")
        else:
            print("Interpretation: Very poor fit. The model performs worse than simply predicting the mean of the target variable. This indicates the model is fundamentally flawed for this data.")

    except ValueError as e:
        print(f"\nERROR: Could not calculate R2 for scenario '{scenario_name}': {e}. Check if y_true and y_pred have compatible shapes or contain invalid values.")
    except Exception as e:
        print(f"\nERROR: An unexpected error occurred for scenario '{scenario_name}': {e}")

# --- Scenario 1: Perfect Prediction (R2 = 1.0) ---
y_true_perfect = np.array([10, 20, 30, 40, 50])
y_pred_perfect = np.array([10, 20, 30, 40, 50])
calculate_and_print_r2(y_true_perfect, y_pred_perfect, "Perfect Prediction")

# --- Scenario 2: Good Prediction (R2 > 0.5) ---
y_true_good = np.array([10, 20, 30, 40, 50, 60, 70, 80])
y_pred_good = np.array([11, 19, 32, 38, 51, 59, 68, 79]) # Close predictions
calculate_and_print_r2(y_true_good, y_pred_good, "Good Prediction")

# --- Scenario 3: Poor Prediction (R2 close to 0) ---
y_true_poor = np.array([10, 20, 30, 40, 50, 60, 70, 80])
y_pred_poor = np.array([25, 5, 60, 15, 70, 30, 10, 45]) # Wildly off, somewhat random
calculate_and_print_r2(y_true_poor, y_pred_poor, "Poor Prediction (Random)")

# --- Scenario 4: Predicting the Mean (R2 = 0) ---
# If your model simply predicts the mean of the true values, R2 will be 0.
y_true_mean = np.array([10, 20, 30, 40, 50])
y_pred_mean = np.array([np.mean(y_true_mean), np.mean(y_true_mean), np.mean(y_true_mean), np.mean(y_true_mean), np.mean(y_true_mean)])
calculate_and_print_r2(y_true_mean, y_pred_mean, "Predicting the Mean")

# --- Scenario 5: Worse than Predicting the Mean (Negative R2) ---
# This happens when your predictions are consistently worse than just using the average.
y_true_worse = np.array([10, 20, 30, 40, 50])
y_pred_worse = np.array([1, 100, 5, 80, 0]) # Very far from true values, worse than mean
calculate_and_print_r2(y_true_worse, y_pred_worse, "Worse than Mean Prediction")

# --- Scenario 6: Edge Case - Constant True Values (R2 can be undefined or 0) ---
# If y_true has no variance, R2 is undefined or 0 because the denominator is zero.
# Scikit-learn handles this by returning 0.0 or a specific error if `multioutput` is not 'raw_values'.
y_true_constant = np.array([50, 50, 50, 50, 50])
y_pred_constant_perfect = np.array([50, 50, 50, 50, 50])
y_pred_constant_imperfect = np.array([51, 49, 50, 51, 49]) # Even slightly off predictions
calculate_and_print_r2(y_true_constant, y_pred_constant_perfect, "Constant True Values (Perfect Pred)")
calculate_and_print_r2(y_true_constant, y_pred_constant_imperfect, "Constant True Values (Imperfect Pred)")


print("\n------------------------------------------------------------------")
print("Key Takeaways on R2 Score:")
print(" - R2 ranges from -infinity to 1. A higher R2 indicates a better fit.")
print(" - It represents the proportion of variance in the dependent variable predictable from the independent variable(s).")
print(" - R2 = 1: Perfect fit (all variance explained).")
print(" - R2 = 0: The model explains no variance (as good as predicting the mean).")
print(" - R2 < 0: The model is worse than predicting the mean (e.g., due to poor model choice or overfitting/underfitting).")
print(" - It's important to consider R2 alongside other metrics (MAE, MSE, RMSE) and the context of your data, as a high R2 doesn't always guarantee a good model (e.g., overfitting or non-linear data fitted with linear model).")


--- Question 3: Calculate R2 Score ---
Objective: Calculate the R2 score for a set of predicted values.
------------------------------------------------------------------

--- Scenario: Perfect Prediction ---
Actual Values (y_true): [10 20 30 40 50]
Predicted Values (y_pred): [10 20 30 40 50]
Calculated R2 Score: 1.0000
Interpretation: Excellent fit. The model explains a very high proportion of the variance in the target variable.

--- Scenario: Good Prediction ---
Actual Values (y_true): [10 20 30 40 50 60 70 80]
Predicted Values (y_pred): [11 19 32 38 51 59 68 79]
Calculated R2 Score: 0.9960
Interpretation: Excellent fit. The model explains a very high proportion of the variance in the target variable.

--- Scenario: Poor Prediction (Random) ---
Actual Values (y_true): [10 20 30 40 50 60 70 80]
Predicted Values (y_pred): [25  5 60 15 70 30 10 45]
Calculated R2 Score: -0.9286
Interpretation: Very poor fit. The model performs worse than simply predicting the mean of the target variable