In [118]:
import warnings
warnings.filterwarnings("ignore")
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
%matplotlib inline
import  matplotlib.pyplot as plt
import joblib
from sklearn.metrics import mean_absolute_percentage_error
import random
import torch
import os

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
seed_everything(seed=521)

In [119]:
train=pd.read_csv("../train_com.csv")
train.describe()

Unnamed: 0,L,V,DO,E,pH,TOC,TN,C/N,C/S,SO4,H2S
count,197.0,197.0,197.0,197.0,197.0,197.0,197.0,197.0,197.0,197.0,197.0
mean,5.461929,0.226688,0.945984,962.086294,7.347244,36.995184,16.058581,2.342726,0.616699,61.555513,16.815631
std,2.339724,0.09099,0.645469,78.275336,0.097261,21.433798,5.695426,0.998732,0.384182,13.670856,16.292675
min,1.0,0.097224,0.02,745.0,7.1,9.321598,6.243985,0.941765,0.225891,35.9363,0.01
25%,4.0,0.16,0.32,922.0,7.285419,21.735546,11.7086,1.556026,0.356042,48.994313,2.75
50%,6.0,0.216,0.96,974.0,7.33,32.673277,15.238409,2.109702,0.510619,61.3582,7.28
75%,7.0,0.276,1.42,1021.0,7.38,48.120133,20.050361,3.076531,0.819627,70.593543,32.7808
max,9.0,0.558,2.54,1100.0,7.7,155.442607,40.117917,5.704046,3.945414,98.1795,50.590173


In [120]:
test=pd.read_csv("../test1.csv")
test.describe()

Unnamed: 0,L,V,DO,E,pH,TOC,TN,C/N,C/S,SO4,H2S
count,24.0,24.0,24.0,24.0,24.0,24.0,24.0,24.0,24.0,24.0,24.0
mean,5.791667,0.195,1.114167,954.0,7.356667,33.082966,16.421136,2.06514,0.538394,63.681004,30.175
std,2.264646,0.074305,0.661999,83.173993,0.107851,17.205562,6.707066,0.855174,0.304127,13.195245,17.598888
min,1.0,0.102,0.12,807.0,7.2,9.727586,5.973685,0.931152,0.166123,36.4819,0.01
25%,6.0,0.13925,0.39,895.0,7.27,21.539393,12.769102,1.383196,0.326058,59.029075,15.7875
50%,7.0,0.166,1.25,960.0,7.32,28.351351,15.669401,2.01273,0.472753,64.4618,33.25
75%,7.0,0.23575,1.65,1006.75,7.44,45.563246,18.390176,2.669494,0.721353,69.482,46.58
max,9.0,0.351,2.25,1074.0,7.61,84.636852,40.117917,4.231449,1.420067,98.1795,50.0


In [121]:
train=np.array(train)
test=np.array(test)
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import scale
from sklearn.model_selection import train_test_split


train_x_1=train[:,:10]
train_y_1=train[:,10]
test_x_1=test[:,:10]
test_y_1=test[:,10]

scaler1=MinMaxScaler()
scaler2=MinMaxScaler()
scaler3=MinMaxScaler()
scaler4=MinMaxScaler()

train_x=scaler1.fit_transform(train_x_1)
train_y=scaler2.fit_transform(np.reshape(train_y_1,(-1,1)))
test_x=scaler3.fit_transform(test_x_1)
test_y=scaler4.fit_transform(np.reshape(test_y_1,(-1,1)))

In [122]:
from sklearn.linear_model import TheilSenRegressor

model = TheilSenRegressor(random_state=10) # 线性回归
model.fit(train_x,train_y)

In [123]:
pre=model.predict(test_x)
print(pre)

pre=scaler4.inverse_transform(np.reshape(pre,(-1,1)))

[-0.20237916  1.0899722  -0.27972311  0.06118879  0.27543761  0.34748789
  0.23374965  1.18180119  0.68351132  0.52010493  0.86260423  0.63477585
  0.06600675  0.42993128  0.39482486  0.74809317  0.62746887  0.88449587
  1.14392782  1.55388495  1.6640927   1.12427153  1.18180119  0.52010493]


In [124]:
from math import sqrt
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
MAE=mean_absolute_error(test_y_1, pre)
MSE=mean_squared_error(test_y_1, pre)
RMSE=sqrt(mean_squared_error(test_y_1,pre))
R2=r2_score(test_y_1,pre)
print("mean_absolute_error:", mean_absolute_error(test_y_1, pre))
print("mean_squared_error:", mean_squared_error(test_y_1, pre))
print("rmse:", sqrt(mean_squared_error(test_y_1, pre)))
print("r2 score:", r2_score(test_y_1, pre))

mean_absolute_error: 13.928791213392088
mean_squared_error: 367.1776206382265
rmse: 19.161879360809746
r2 score: -0.23705537037801716


In [125]:
results = {
    'Metric': ['mean_absolute_error', 'mean_squared_error', 'rmse', 'r2 score'],
    'Value': [MAE, MSE, RMSE, R2]
}
# 使用Pandas创建一个数据框
results_df = pd.DataFrame(results)

# 打印数据框
print(results_df)
results_df.to_csv('16.csv', index=False)

                Metric       Value
0  mean_absolute_error   13.928791
1   mean_squared_error  367.177621
2                 rmse   19.161879
3             r2 score   -0.237055


In [126]:
datapre=np.asarray(pre)
datapre.tofile('16-pre.csv',sep=',',format='%10.5f')