In [28]:
# from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, SGDRegressor
from sklearn.metrics import mean_squared_error
import pandas as pd
import numpy as np

In [32]:
file_data = pd.read_csv("./player_data.csv", encoding="gbk")
file_data.head()

Unnamed: 0,Rank,Name,Game,Score
0,1,安赛龙,16,105655
1,2,奈良冈功大,23,83515
2,3,李诗沣,23,83398
3,4,安东尼·西尼苏卡·金廷,20,83331
4,5,乔纳坦·克里斯蒂,18,82111


In [33]:
data = file_data[['Score','Game']]
target = file_data['Rank']

In [38]:
def linear_model_1_Normal_Equation():
    """
    线性回归：正规方程
    """
    # 2. 数据集划分
    x_train, x_test, y_train, y_test = train_test_split(data, target, random_state=2, test_size=0.1)

    # 3. 特征工程：标准化
    transformer = StandardScaler()

    # fit()函数用于计算训练数据的均值和标准差，以便对训练数据进行标准化
    x_train = transformer.fit_transform(x_train)

    # transform()函数用于使用相同的均值和标准差来对测试数据进行标准化。这确保了我们使用相同的标准化方法来处理训练和测试数据。
    x_test = transformer.transform(x_test)

    # 4. 机器学习：线性回归（正规方程）
    estimator = LinearRegression()
    estimator.fit(x_train, y_train)

    # 5. 模型评估
    ## 5.1 获取系数
    y_predict = estimator.predict(x_test)
    print(f"预测值为：{y_predict}")
    print(f"模型中的系数为：{estimator.coef_}")
    print(f"模型中的偏置为：{estimator.intercept_}")

    ## 5.2 评价：均方误差
    error_mse = mean_squared_error(y_test, y_predict)
    print(f"均方误差为：{error_mse}")

    return None

linear_model_1_Normal_Equation()


预测值为：[ 75.22269789  64.27183853  71.88746663  25.87548647  70.75067654
  31.45639632  -2.63713284  70.9444515   69.71595124  47.02028477
 -18.73460261  69.38982459  23.98607686  74.37871157  66.75903564
 -19.34885274  73.00202947  64.77894595   6.44180962  78.55933862
  70.28910797  25.88569655 -19.6167909   67.20012978  73.42348046
  76.34644073  70.9444515   39.47256733  78.01701095  14.89692432
  27.31109804  68.90563115  75.22269789  51.43182263  57.80785487
  75.57632851  70.86842695  71.33765753  69.39275206  72.56093532
  58.85244689  37.29382358  69.38615616  62.7531186   64.73306374
  39.86832143  64.73306374  19.62535457  47.25108707  26.87000389
  52.16508809  75.57632851  29.61306773  50.25755332  76.83121238
  11.36020183  55.86206387  75.22269789  55.86206387  75.63984798
  56.71029683  67.10632343  44.8193498   22.61378595  57.80785487
  33.16131948  54.06700278  75.62089154  55.01016247  63.93564635
  67.62588167  28.63736219  61.53267798  79.05319998  33.4014103
  55.8

In [35]:
def linear_model_2_Gradient_Descent():
     """
          线性回归：梯度下降法
     """
     
     # 2. 数据集划分
     x_train, x_test, y_train, y_test = train_test_split(data, target, random_state=2, test_size=0.2)
     
     # 3. 特征工程：标准化
     transformer = StandardScaler()
     # fit()函数用于计算训练数据的均值和标准差，以便对训练数据进行标准化
     x_train = transformer.fit_transform(x_train)
     
     # transform()函数用于使用相同的均值和标准差来对测试数据进行标准化。这确保了我们使用相同的标准化方法来处理训练和测试数据。
     x_test = transformer.transform(x_test)
     
     # 4. 机器学习：线性回归（梯度下降法）
     estimator = SGDRegressor(max_iter=1000)  # 这个函数中的max_iter不是epoch，而是每个epoch中的迭代次数。
     estimator.fit(X=x_train, y=y_train)
     
     # 5. 模型评估
     ## 5.1 获取系数
     y_predict = estimator.predict(x_test)
     print(f"模型的预测值为：{y_predict}")
     print(f"模型中的系数为：{estimator.coef_}")
     print(f"模型中的偏置为：{estimator.intercept_}")
     
     ## 5.2 评价：MSE
     error = mean_squared_error(y_true=y_test, y_pred=y_predict)
     print(f"MSE误差为：{error}")
     
     return None

linear_model_2_Gradient_Descent()


模型的预测值为：[ 75.31332518  64.22194221  71.87693785  25.8788532   70.81818656
  31.19490921  -2.9070295   71.13883604  69.82007006  47.04904028
 -19.12966968  69.74400323  23.89576142  74.21323037  67.01569969
 -19.78905267  73.12313504  65.02537431   6.30276005  78.28959553
  70.22835917  25.80520084 -19.93250305  67.41711638  73.3789342
  76.23308893  71.13883604  39.29447591  77.87033724  14.75962424
  27.40580111  68.92164596  75.31332518  51.6931708   57.72454422
  75.2072501   71.02046088  71.07254095  69.49520016  72.72171836
  58.564748    37.39817283  69.48857016  62.61145633  64.72750925
  39.69227578  64.72750925  19.4705396   46.94537478  27.00438442
  52.22043868  75.2072501   29.71967039  50.30304328  76.55253607
  11.20461934  55.76869482  75.31332518  55.76869482  75.48088597
  56.62131256  67.44869794  44.54302471  22.43246149  57.72454422
  33.20234894  54.04826974  75.50378916  54.66064402  64.09380021
  67.76115389  28.69696352  61.17491881  78.7440522   33.44368086
  5