In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats

In [2]:
data = pd.read_csv('concrete_compressive_strength_preprocessed.csv', delimiter=',', index_col=0)
data.head()
data = data[(np.abs(stats.zscore(data)) < 3).all(axis=1)]

In [3]:
x = data.drop(columns=['Concrete compressive strength'])
x_scaled = StandardScaler().fit(x).transform(x)

In [4]:
x_train, x_test, y_train, y_test = train_test_split(x_scaled, data['Concrete compressive strength'], test_size = 0.20)

# Линейная регрессия

In [5]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from math import sqrt

In [6]:
model = LinearRegression()
model.fit(x_train, y_train)
y_pred = model.predict(x_test)

In [7]:
print ('R2:',r2_score(y_test, y_pred))
print ('Mean Square Error(MSE):',mean_squared_error(y_test, y_pred, squared=True))
print ('Root Mean Square Error(RMSE):',mean_squared_error(y_test, y_pred, squared=False))
print ('mean absolute error (MAE):',mean_absolute_error(y_test, y_pred))

R2: 0.6039382343405371
Mean Square Error(MSE): 103.30647367801176
Root Mean Square Error(RMSE): 10.16397922459564
mean absolute error (MAE): 7.70966977943265


# Регрессия дерева решений

In [8]:
from sklearn.tree import DecisionTreeRegressor

In [9]:
model = DecisionTreeRegressor(random_state=0)
model.fit(x_train, y_train)
y_pred = model.predict(x_test)

In [10]:
print ('R2:',r2_score(y_test, y_pred))
print ('Mean Square Error(MSE):',mean_squared_error(y_test, y_pred, squared=True))
print ('Root Mean Square Error(RMSE):',mean_squared_error(y_test, y_pred, squared=False))
print ('mean absolute error (MAE):',mean_absolute_error(y_test, y_pred))

R2: 0.7594702418858628
Mean Square Error(MSE): 62.7383991080861
Root Mean Square Error(RMSE): 7.9207574832263425
mean absolute error (MAE): 4.929401912239952


# LASSO

In [11]:
from sklearn.linear_model import Lasso

In [12]:
model = Lasso(alpha=1.0)
model.fit(x_train, y_train)
y_pred = model.predict(x_test)

In [13]:
print ('R2:',r2_score(y_test, y_pred))
print ('Mean Square Error(MSE):',mean_squared_error(y_test, y_pred, squared=True))
print ('Root Mean Square Error(RMSE):',mean_squared_error(y_test, y_pred, squared=False))
print ('mean absolute error (MAE):',mean_absolute_error(y_test, y_pred))

R2: 0.5985106810242586
Mean Square Error(MSE): 104.72216547767471
Root Mean Square Error(RMSE): 10.233384849485272
mean absolute error (MAE): 7.9375369974890795


# Гребневая регрессия

In [14]:
from sklearn.linear_model import Ridge

In [15]:
model = Ridge(alpha=1.0)
model.fit(x_train, y_train)
y_pred = model.predict(x_test)

In [16]:
print ('R2:',r2_score(y_test, y_pred))
print ('Mean Square Error(MSE):',mean_squared_error(y_test, y_pred, squared=True))
print ('Root Mean Square Error(RMSE):',mean_squared_error(y_test, y_pred, squared=False))
print ('mean absolute error (MAE):',mean_absolute_error(y_test, y_pred))

R2: 0.6042200563576277
Mean Square Error(MSE): 103.23296484349456
Root Mean Square Error(RMSE): 10.16036243662078
mean absolute error (MAE): 7.7119817215525055


# Elastic Net регрессия

In [17]:
from sklearn.linear_model import ElasticNetCV

In [18]:
model = ElasticNetCV(cv=5,random_state=0)
model.fit(x_train, y_train)
y_pred = model.predict(x_test)

In [19]:
print ('R2:',r2_score(y_test, y_pred))
print ('Mean Square Error(MSE):',mean_squared_error(y_test, y_pred, squared=True))
print ('Root Mean Square Error(RMSE):',mean_squared_error(y_test, y_pred, squared=False))
print ('mean absolute error (MAE):',mean_absolute_error(y_test, y_pred))

R2: 0.6051432122982077
Mean Square Error(MSE): 102.99217415591723
Root Mean Square Error(RMSE): 10.148506006103421
mean absolute error (MAE): 7.7253837655592354
