In [1]:
# Step 1: Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder

# Step 2: Load dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data"
columns = ['Sex','Length','Diameter','Height','WholeWeight','ShuckedWeight','VisceraWeight','ShellWeight','Rings']
abalone = pd.read_csv(url, names=columns)

# Step 3: Preprocessing
# Convert categorical 'Sex' to numeric (M,F,I)
le = LabelEncoder()
abalone['Sex'] = le.fit_transform(abalone['Sex'])

# Define features (X) and target (y)
X = abalone.drop('Rings', axis=1)
y = abalone['Rings']

# Step 4: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Train Linear Regression
model = LinearRegression()
model.fit(X_train, y_train)

# Step 6: Predictions
y_pred = model.predict(X_test)

# Step 7: Evaluation
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error:", mse)
print("R2 Score:", r2)

# Step 8: Show coefficients
coef_df = pd.DataFrame({"Feature": X.columns, "Coefficient": model.coef_})
print(coef_df)


Mean Squared Error: 5.0625379540952355
R2 Score: 0.5323381317508206
         Feature  Coefficient
0            Sex     0.084190
1         Length    -1.539459
2       Diameter    13.597938
3         Height    11.448400
4    WholeWeight     9.157396
5  ShuckedWeight   -20.681025
6  VisceraWeight    -8.752822
7    ShellWeight     8.648475


In [2]:
# Step 7: Evaluation
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Adjusted R2
n = X_test.shape[0]   # number of observations in test set
p = X_test.shape[1]   # number of predictors
adj_r2 = 1 - (1 - r2) * (n - 1) / (n - p - 1)

print("Mean Squared Error:", mse)
print("R2 Score:", r2)
print("Adjusted R2 Score:", adj_r2)


Mean Squared Error: 5.0625379540952355
R2 Score: 0.5323381317508206
Adjusted R2 Score: 0.5278141959031865
