In [4]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Sample input features (x) and target values (y)
x = np.array([[1], [2], [3], [4], [5]])
y = np.array([2, 4, 6, 8, 10])

# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Print the training and testing data
print("Training data:")
print("x_train:", x_train)
print("y_train:", y_train)
print("Testing data:")
print("x_test:", x_test)
print("y_test:", y_test)

# Create a linear regression model
model = LinearRegression()

# Train the model
model.fit(x_train, y_train)

# Predict using the trained model
y_pred_train = model.predict(x_train)
y_pred_test = model.predict(x_test)

# Print the predicted values
print("Training predictions:", y_pred_train)
print("Testing predictions:", y_pred_test)

# Predict using the trained model
x_new = np.array([[6]])
y_pred = model.predict(x_new)

# Print the predicted value
print("Predicted value:", y_pred)

"""
Here are a few common evaluation metrics for regression models:

Mean Squared Error (MSE): 
It measures the average squared difference between the predicted and actual values. 
Lower values indicate better performance.

Root Mean Squared Error (RMSE): 
It is the square root of the MSE, providing an interpretable metric 
in the same units as the target variable.

Mean Absolute Error (MAE): 
It computes the average absolute difference between the predicted and actual values. 
MAE is more robust to outliers compared to MSE.

R-squared (R²) Score: 
It represents the proportion of the variance in the target variable 
that is predictable from the input features. 
It ranges from 0 to 1, with higher values indicating better prediction.
"""
# Calculate evaluation metrics
mse_train = mean_squared_error(y_train, y_pred_train)
mse_test = mean_squared_error(y_test, y_pred_test)

rmse_train = np.sqrt(mse_train)
rmse_test = np.sqrt(mse_test)

mae_train = mean_absolute_error(y_train, y_pred_train)
mae_test = mean_absolute_error(y_test, y_pred_test)


r2_train = r2_score(y_train, y_pred_train)
r2_test = r2_score(y_test, y_pred_test)

# Calculate R-squared (R²) score using the score method
r2_train_score = model.score(x_train, y_train)
r2_test_score = model.score(x_test, y_test)

# Print the evaluation metrics and R-squared score
print("Mean Squared Error (MSE) - Train:", mse_train)
print("Mean Squared Error (MSE) - Test:", mse_test)

print("Root Mean Squared Error (RMSE) - Train:", rmse_train)
print("Root Mean Squared Error (RMSE) - Test:", rmse_test)

print("Mean Absolute Error (MAE) - Train:", mae_train)
print("Mean Absolute Error (MAE) - Test:", mae_test)

print("R-squared (R²) Score - Train (via r2_score):", r2_train)
print("R-squared (R²) Score - Test (via r2_score):", r2_test)

print("R-squared (R²) Score - Train (via score method):", r2_train_score)
print("R-squared (R²) Score - Test (via score method):", r2_test_score)


Training data:
x_train: [[5]
 [3]
 [1]
 [4]]
y_train: [10  6  2  8]
Testing data:
x_test: [[2]]
y_test: [4]
Training predictions: [10.  6.  2.  8.]
Testing predictions: [4.]
Predicted value: [12.]
Mean Squared Error (MSE) - Train: 0.0
Mean Squared Error (MSE) - Test: 0.0
Root Mean Squared Error (RMSE) - Train: 0.0
Root Mean Squared Error (RMSE) - Test: 0.0
Mean Absolute Error (MAE) - Train: 0.0
Mean Absolute Error (MAE) - Test: 0.0
R-squared (R²) Score - Train (via r2_score): 1.0
R-squared (R²) Score - Test (via r2_score): nan
R-squared (R²) Score - Train (via score method): 1.0
R-squared (R²) Score - Test (via score method): nan


