In [3]:
import pandas as pd  
import numpy as np  
import matplotlib.pyplot as plt  
from sklearn.model_selection import train_test_split  
from sklearn.linear_model import LinearRegression  

# Creating the dataset
data = {  
    "Horsepower": [150, 120, 200, 130, 180],  
    "Mileage": [50, 80, 30, 70, 40],  
    "Year": [2018, 2016, 2020, 2017, 2019],  
    "Luxury": [1, 0, 1, 0, 1],  
    "Price": [25000, 15000, 40000, 18000, 35000]  
}  
df = pd.DataFrame(data)

# Defining features (X) and target (Y)
X = df[["Horsepower", "Mileage", "Year", "Luxury"]]  
y = df["Price"]  

# Splitting into 80% training and 20% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the model
model = LinearRegression()  
model.fit(X_train, y_train)

# Predict car prices
predictions = model.predict(X_test)  
print("Predicted Prices:", predictions)

feature_importance = pd.Series(model.coef_, index=X.columns)  
print("Feature Importance:\n", feature_importance)

Predicted Prices: [18000.]
Feature Importance:
 Horsepower     500.000000
Mileage        149.253731
Year         -3507.462687
Luxury        3492.537313
dtype: float64


In [4]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score  

# Calculate evaluation metrics
mae = mean_absolute_error(y_test, predictions)  
mse = mean_squared_error(y_test, predictions)  
r2 = r2_score(y_test, predictions)  

print(f"Mean Absolute Error: {mae:.2f}")  
print(f"Mean Squared Error: {mse:.2f}")  
print(f"R² Score: {r2:.2f}")

Mean Absolute Error: 3000.00
Mean Squared Error: 9000000.00
R² Score: nan




The warning occurs because the R² score requires at least two samples in the test set to compute a meaningful value. Since the dataset is small and the test set contains only one sample, the R² score cannot be calculated.

To resolve this, you can either increase the size of the dataset or adjust the test size to ensure that the test set contains at least two samples. Here’s how you can modify the test size:


In [5]:
# Splitting into 67% training and 33% testing to ensure at least 2 samples in
# test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

# Create and train the model
model = LinearRegression()  
model.fit(X_train, y_train)

# Predict car prices
predictions = model.predict(X_test)  
print("Predicted Prices:", predictions)

# Calculate evaluation metrics
mae = mean_absolute_error(y_test, predictions)  
mse = mean_squared_error(y_test, predictions)  
r2 = r2_score(y_test, predictions)  

print(f"Mean Absolute Error: {mae:.2f}")  
print(f"Mean Squared Error: {mse:.2f}")  
print(f"R² Score: {r2:.2f}")

Predicted Prices: [14521.66656281 33840.55552094]
Mean Absolute Error: 818.89
Mean Squared Error: 786557.19
R² Score: 0.99



This adjustment ensures that the test set has at least two samples, allowing the R² score to be calculated without warnings.



In [6]:
# Print feature importance with two decimal places
formatted_importance = feature_importance.apply(lambda x: f"{x:.2f}")
print("Feature Importance:\n", formatted_importance)

Feature Importance:
 Horsepower      500.00
Mileage         149.25
Year          -3507.46
Luxury         3492.54
dtype: object
