<a href="https://colab.research.google.com/github/MohanBabuc12/6th-sem-ML-Lab/blob/main/ML_Lab%20test%20code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt

np.random.seed(42) # for reproducibility
n_samples = 100
X1 = np.random.rand(n_samples, 1) * 10
X2 = np.random.rand(n_samples, 1) * 5
y = 2 * X1 + 3 * X2 + np.random.randn(n_samples, 1)

data = {'BMI': X1.flatten(), 'Age': X2.flatten(), 'Sugar': y.flatten()}
df = pd.DataFrame(data)

# Define features (X) and target (y)
X = df[['BMI', 'Age']]
y = df['Sugar']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# --- Linear Regression Model (Multilinear) ---
# Instantiate and train the Linear Regression model
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)

# Make predictions
lr_y_pred = lr_model.predict(X_test)

# Evaluate the Linear Regression model
lr_mse = mean_squared_error(y_test, lr_y_pred)
lr_r2 = r2_score(y_test, lr_y_pred)

# Print evaluation metrics for Linear Regression
print("--- Linear Regression (Multilinear) ---")
print("Mean Squared Error:", lr_mse)
print("R-squared:", lr_r2)

# --- Random Forest Regression Model ---
# Instantiate and train the Random Forest Regression model
rf_model = RandomForestRegressor(random_state=42)
rf_model.fit(X_train, y_train)

# Make predictions
rf_y_pred = rf_model.predict(X_test)

# Evaluate the Random Forest Regression model
rf_mse = mean_squared_error(y_test, rf_y_pred)
rf_r2 = r2_score(y_test, rf_y_pred)

# Print evaluation metrics for Random Forest Regression
print("\n--- Random Forest Regression ---")
print("Mean Squared Error:", rf_mse)
print("R-squared:", rf_r2)

# --- Pseudo-accuracy calculation ---
def calculate_pseudo_accuracy(actual, predictions, tolerance):
    correct_predictions = np.sum(np.abs(actual - predictions) <= tolerance)
    return correct_predictions / len(actual)

# Define the tolerance
tolerance = 2.0  # You can adjust this value

print(f"\nTolerance for pseudo-accuracy: +/- {tolerance}")

# Calculate pseudo-accuracy for Linear Regression
lr_pseudo_accuracy = calculate_pseudo_accuracy(y_test, lr_y_pred, tolerance)
print(f"Linear Regression Pseudo-Accuracy: {lr_pseudo_accuracy:.2f}")

# Calculate pseudo-accuracy for Random Forest Regression
rf_pseudo_accuracy = calculate_pseudo_accuracy(y_test, rf_y_pred, tolerance)
print(f"Random Forest Regression Pseudo-Accuracy: {rf_pseudo_accuracy:.2f}")

# --- Plotting ---
plt.figure(figsize=(10, 5))

plt.subplot(1, 2, 1) # 1 row, 2 columns, 1st plot
plt.scatter(y_test, lr_y_pred, alpha=0.5)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=2) # Ideal line
plt.xlabel("Actual Target")
plt.ylabel("Predicted Target (Linear Regression)")
plt.title("Actual vs. Predicted (Linear Regression)")

plt.subplot(1, 2, 2) # 1 row, 2 columns, 2nd plot
plt.scatter(y_test, rf_y_pred, alpha=0.5, color='green')
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=2) # Ideal line
plt.xlabel("Actual Target")
plt.ylabel("Predicted Target (Random Forest)")
plt.title("Actual vs. Predicted (Random Forest Regression)")

plt.tight_layout() # Adjust layout to prevent overlapping titles/labels
plt.show()