In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

In [5]:
def load_dataset():
    iris = load_iris()

    # to df
    df = pd.DataFrame(data=np.c_[iris['data'], iris['target']],
                     columns=iris['feature_names'] + ['target'])

    return df, iris['feature_names']

In [6]:
def preprocessing(df, target_index=0, scale=True):
  features = df.iloc[:, [i for i in range(4) if i != target_index]]
  print("Miền giá trị của các feature trước khi scale:")
  for column in features.columns:
      print(f"{column}:")
      print(f"Min: {features[column].min():.2f}")
      print(f"Max: {features[column].max():.2f}")
      print(f"Mean: {features[column].mean():.2f}")
      print(f"Std: {features[column].std():.2f}")
  # target
  target = df.iloc[:, target_index]
  X_train, X_test, y_train, y_test = train_test_split(
      features, target, test_size=0.2, random_state=42
  )
  if scale:
    # z = (x - μ) / σ -> standard scaler
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    X_train = pd.DataFrame(X_train, columns=features.columns)
    X_test = pd.DataFrame(X_test, columns=features.columns)
  print("Miền giá trị của các feature sau khi scale:")
  for column in X_train.columns:
          print(f"{column}:")
          print(f"Min: {X_train[column].min():.2f}")
          print(f"Max: {X_train[column].max():.2f}")
          print(f"Mean: {X_train[column].mean():.2f}")
          print(f"Std: {X_train[column].std():.2f}")
  return X_train, X_test, y_train, y_test

In [2]:
def train_model(X_train, X_test, y_train, y_test, feature_names, target_name):
    model = LinearRegression()
    model.fit(X_train, y_train)

    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)

    train_mse = mean_squared_error(y_train, y_train_pred)
    test_mse = mean_squared_error(y_test, y_test_pred)
    train_r2 = r2_score(y_train, y_train_pred)
    test_r2 = r2_score(y_test, y_test_pred)

    print("Hệ số hồi quy:")
    for name, coef in zip(feature_names, model.coef_):
        print(f"{name}: {coef:.4f}")
    print(f"w0: {model.intercept_:.4f}")

    print(f"\nMetrics đánh giá:")
    print(f"Train MSE: {train_mse:.4f}")
    print(f"Test MSE: {test_mse:.4f}")
    print(f"Train R2 Score: {train_r2:.4f}")
    print(f"Test R2 Score: {test_r2:.4f}")

    return model

In [11]:
df, feature_names = load_dataset()


target_index = 0
target_name = feature_names[target_index]


X_train, X_test, y_train, y_test = preprocessing(
    df, target_index=target_index, scale=True
)

input_features = [name for i, name in enumerate(feature_names) if i != target_index]


model = train_model(
    X_train, X_test, y_train, y_test,
    input_features, target_name
)


print("Demo dự đoán:")
sample_input = X_test.iloc[0]
print("Đầu vào:", dict(sample_input))
prediction = model.predict([sample_input])[0]
print(f"Dự đoán {target_name}: {prediction:.2f}")

Miền giá trị của các feature trước khi scale:
sepal width (cm):
Min: 2.00
Max: 4.40
Mean: 3.06
Std: 0.44
petal length (cm):
Min: 1.00
Max: 6.90
Mean: 3.76
Std: 1.77
petal width (cm):
Min: 0.10
Max: 2.50
Mean: 1.20
Std: 0.76
Miền giá trị của các feature sau khi scale:
sepal width (cm):
Min: -2.37
Max: 2.99
Mean: -0.00
Std: 1.00
petal length (cm):
Min: -1.56
Max: 1.70
Mean: -0.00
Std: 1.00
petal width (cm):
Min: -1.45
Max: 1.76
Mean: -0.00
Std: 1.00
Hệ số hồi quy:
sepal width (cm): 0.2967
petal length (cm): 1.3217
petal width (cm): -0.5051
w0: 5.8092

Metrics đánh giá:
Train MSE: 0.0963
Test MSE: 0.1021
Train R2 Score: 0.8569
Test R2 Score: 0.8520
Demo dự đoán:
Đầu vào: {'sepal width (cm)': np.float64(-0.5850597637540184), 'petal length (cm)': np.float64(0.5577752408630077), 'petal width (cm)': np.float64(0.022247505360231082)}
Dự đoán sepal length (cm): 6.36


