In [1]:
import pandas as pd

In [2]:
# 读取CSV文件   # Read CSV file
df = pd.read_csv("最终的记录.csv", encoding='utf-8', encoding_errors='ignore')

# 重命名列（确保与之前一致）# Rename columns (ensure consistency with previous naming)
df.columns = [
    '商品名称', '一级种类', '二级种类', '图片地址', 'sku', '文本描述', '折扣率',
    '折扣价', '价格', '星级', '销量', '收入', '评论', 'cc-1', 'cc-2',
    'cc-3', 'DRC-1', 'DRC-2', 'DRC-3', 'RCV-1', 'RCV-2',
    'RCV-3', 'RSV-1', 'RSV-2', 'RSV-3'
]


# 转换数据类型       # Convert data type
numeric_columns = ['星级', '折扣率', '折扣价', '价格', '销量', '收入']
for col in numeric_columns:
    df[col] = pd.to_numeric(df[col], errors='coerce')

# 重新计算收入（如果需要）   # Recalculate income (if necessary)
df['收入'] = df['折扣价'] * df['销量']

# 数值特征    # Numerical Features
numerical_features = ['折扣价', '折扣率', '价格']

In [3]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# 假设 df 是您已经加载的 DataFrame，并包含以下列：
# ['星级', '折扣价', '折扣率', '价格', '收入']
# Assume that df is the DataFrame you have already loaded, and it contains the following columns:
# ['Star Rating', 'Discounted Price', 'Discount Rate', 'Price', 'Revenue']
# 提取特征和目标变量
# Extracting features and target variables
X_data_kind = df[['折扣价', '折扣率', '价格']]
y = df['收入']

# 定义随机种子列表   # Define a list of random seeds
random_states =  [ 42, 23, 15, 34, 18, 32, 47, 27,8, 52]

# 初始化结果存储列表  # Initialize the result storage list
results = {
    'random_state': [],
    'MAE': [],
    'RMSE': [],
    'R²': [],
    'Coefficients': [],
    'Intercept': []
}

# 遍历每个随机种子   # Iterate over each random seed
for state in random_states:
    print(f"\n=== 使用 random_state = {state} ===")
    
    # 划分训练集和测试集   # Divide training set and test set
    X_train, X_test, y_train, y_test = train_test_split(
        X_data_kind,
        y,
        test_size=0.1,
        random_state=state
    )
    
    # 多元线性回归  # Multiple linear regression
    ols_model = LinearRegression()
    
    # 训练模型  # Train Model
    ols_model.fit(X_train, y_train)
    
    # 进行预测  # Make predictions
    y_pred = ols_model.predict(X_test)
    
    # 计算评估指标    # Calculation of evaluation metrics
    mae = mean_absolute_error(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)
    
    print(f"MAE: {mae:.4f}")
    print(f"RMSE: {rmse:.4f}")
    print(f"R²: {r2:.4f}")
    
    # 输出模型的系数和截距    # Output the coefficients and intercept of the model
    print(f"Coefficients: {ols_model.coef_}")
    print(f"Intercept: {ols_model.intercept_}")
    
    # 记录结果    Record results
    results['random_state'].append(state)
    results['MAE'].append(mae)
    results['RMSE'].append(rmse)
    results['R²'].append(r2)
    # 将系数转换为列表，以便存储在DataFrame中   # Convert the coefficients into a list for storage in a DataFrame
    results['Coefficients'].append(ols_model.coef_.tolist())
    results['Intercept'].append(ols_model.intercept_)

# 将结果转换为 DataFrame   # Convert the results to a DataFrame
results_df = pd.DataFrame(results)

# 展示汇总结果  # Display summary results
print("\n=== 所有随机种子的实验结果汇总 ===")
print(results_df)

# 可选：保存实验结果到 CSV 文件
results_df.to_csv("linear_regression_results.csv", index=False, encoding='utf-8-sig')
print("\n实验结果已保存到 'linear_regression_results.csv'")

# 可选：统计分析
print("\n=== 评估指标统计 ===")
print(results_df[['MAE', 'RMSE', 'R²']].describe())


=== 使用 random_state = 42 ===
MAE: 1282.3543
RMSE: 1686.2564
R²: 0.0487
Coefficients: [  67.58249848 -511.67755506   49.33208859]
Intercept: 838.463267084033

=== 使用 random_state = 23 ===
MAE: 1329.2604
RMSE: 1773.7382
R²: 0.0851
Coefficients: [  83.44982844 -477.45259328   37.11318448]
Intercept: 802.0739314774338

=== 使用 random_state = 15 ===
MAE: 1495.7221
RMSE: 2825.5920
R²: 0.0330
Coefficients: [  67.90595965 -478.02033623   52.88446174]
Intercept: 754.8889749255175

=== 使用 random_state = 34 ===
MAE: 1287.4613
RMSE: 1724.9413
R²: 0.1024
Coefficients: [  70.76484974 -488.45809031   46.98474815]
Intercept: 809.5110546725255

=== 使用 random_state = 18 ===
MAE: 1974.7258
RMSE: 6853.1556
R²: -0.0092
Coefficients: [  80.17474483 -636.43793959   49.90180505]
Intercept: 665.2390724450993

=== 使用 random_state = 32 ===
MAE: 1347.8273
RMSE: 1918.7745
R²: 0.0980
Coefficients: [  62.27862743 -565.33970989   53.91545721]
Intercept: 827.0207847653586

=== 使用 random_state = 47 ===
MAE: 1706.5095
R

In [4]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


# 假设 df 是您已经加载的 DataFrame，并包含以下列：
# ['星级', '折扣价', '折扣率', '价格', '收入']
# Assume that df is the DataFrame you have already loaded, and it contains the following columns:
# ['Star Rating', 'Discounted Price', 'Discount Rate', 'Price', 'Revenue']
# 提取特征和目标变量
# Extracting features and target variables
X_data_kind = df[['折扣价', '折扣率', '价格']]
y = df['收入']

# 定义随机种子列表   # Define a list of random seeds
random_states =  [ 42, 23, 15, 34, 18, 32, 47, 27,8, 52]

# 初始化结果存储列表  # Initialize the result storage list
results = {
    'random_state': [],
    'MAE': [],
    'RMSE': [],
    'R²': [],
    'Coefficients': [],
    'Intercept': []
}


# 定义多项式次数
degree = 2  # 您可以根据需求调整多项式的次数   # You can adjust the degree of the polynomial according to your needs
# 遍历每个随机种子     # Iterate over each random seed
for state in random_states:
    print(f"\n=== 使用 random_state = {state} ===")
    
    # 划分训练集和测试集   # Divide training set and test set
    X_train, X_test, y_train, y_test = train_test_split(
        X,
        y,
        test_size=0.1,
        random_state=state
    )
    
    # 创建多项式特征生成器   # Create a polynomial feature generator
    poly_features = PolynomialFeatures(degree=degree, include_bias=False)
    
    # 生成多项式特征    # Generating polynomial features
    X_train_poly = poly_features.fit_transform(X_train)
    X_test_poly = poly_features.transform(X_test)
    
    # 创建线性回归模型（使用多项式特征）    # Create a linear regression model (using polynomial features)
    poly_model = LinearRegression()
    
    # 训练模型   # Train Model
    poly_model.fit(X_train_poly, y_train)
    
    # 进行预测  # Make predictions
    y_pred_poly = poly_model.predict(X_test_poly)
    
    # 计算评估指标     # Calculation of evaluation metrics
    mae_poly = mean_absolute_error(y_test, y_pred_poly)
    rmse_poly = np.sqrt(mean_squared_error(y_test, y_pred_poly))
    r2_poly = r2_score(y_test, y_pred_poly)
    
    print(f"Polynomial Regression MAE: {mae_poly:.4f}")
    print(f"Polynomial Regression RMSE: {rmse_poly:.4f}")
    print(f"Polynomial Regression R²: {r2_poly:.4f}")
    
    # 输出模型的系数和截距  # Output the coefficients and intercept of the model
    print(f"Coefficients: {poly_model.coef_}")
    print(f"Intercept: {poly_model.intercept_}")
    
    # 记录结果   # Record results
    results['random_state'].append(state)
    results['MAE'].append(mae_poly)
    results['RMSE'].append(rmse_poly)
    results['R²'].append(r2_poly)
    results['Coefficients'].append(poly_model.coef_.tolist())
    results['Intercept'].append(poly_model.intercept_)

# 将结果转换为 DataFrame
results_df = pd.DataFrame(results)

# 展示汇总结果
print("\n=== 所有随机种子的实验结果汇总 ===")
print(results_df)

# 可选：保存实验结果到 CSV 文件
results_df.to_csv("polynomial_regression_results.csv", index=False, encoding='utf-8-sig')
print("\n实验结果已保存到 'polynomial_regression_results.csv'")

# 可选：统计分析
print("\n=== 评估指标统计 ===")
print(results_df[['MAE', 'RMSE', 'R²']].describe())



=== 使用 random_state = 42 ===
Polynomial Regression MAE: 1272.2258
Polynomial Regression RMSE: 1698.6099
Polynomial Regression R²: 0.0347
Coefficients: [-8.59569943e+10 -1.90402896e+03  8.59569946e+10 -2.32959585e+01
 -8.59569943e+10  1.40233266e+01  3.39695471e+03 -3.47279663e+02
  2.20488620e+00]
Intercept: 459.05557881593586

=== 使用 random_state = 23 ===
Polynomial Regression MAE: 1296.7170
Polynomial Regression RMSE: 1756.2063
Polynomial Regression R²: 0.1031
Coefficients: [-8.61116875e+10 -2.57340292e+03  8.61116877e+10  1.02855158e+00
 -8.61116870e+10 -2.16856449e+01  4.00265949e+03 -5.58283127e+02
  1.40867362e+01]
Intercept: 445.4070746310483

=== 使用 random_state = 15 ===
Polynomial Regression MAE: 1478.6664
Polynomial Regression RMSE: 2803.7274
Polynomial Regression R²: 0.0479
Coefficients: [-7.35821073e+10 -2.06506075e+03  7.35821076e+10 -1.26829456e+01
 -7.35821070e+10 -8.54309797e-01  3.55630144e+03 -4.57758100e+02
  7.10269666e+00]
Intercept: 413.34677233442517

=== 使用 ran