In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Lasso
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import r2_score, mean_absolute_error
from sklearn.ensemble import RandomForestRegressor
import matplotlib.pyplot as plt 
import seaborn as sns
import pickle 
from sklearn.svm import SVR
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler

In [8]:
df = pd.read_csv('processd_df.csv', sep = ';')
y = df['CO(GT)']
x = df.loc[:, ['Month', 'Weekday', 'Hour', 'PT08.S1(CO)', 'C6H6(GT)', 'PT08.S2(NMHC)', 'NOx(GT)', 'PT08.S3(NOx)', 'NO2(GT)', 'PT08.S4(NO2)', 'PT08.S5(O3)', 'T', 'RH', 'AH']]
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=42)

In [5]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [9]:
lin_model = LinearRegression()
lin_model.fit(X_train, y_train)
y_pred = lin_model.predict(X_test)
print('Метрика R2:', r2_score(y_test, y_pred))
print('MAE:', mean_absolute_error(y_test, y_pred))

Метрика R2: 0.9184234442438407
MAE: 0.24967928601306666


In [10]:
lasso_model = Lasso()
parameters = {'alpha': [0.1, 0.5, 1, 3, 5]}
lasso = GridSearchCV(lasso_model, parameters)
lasso.fit(X_train, y_train)
y_pred = lasso.predict(X_test)
print('Лучшие гиперпараметры', lasso.best_params_)
print('Метрика R2:', r2_score(y_test, y_pred))
print('MAE:', mean_absolute_error(y_test, y_pred))

Лучшие гиперпараметры {'alpha': 0.1}
Метрика R2: 0.9096827831592366
MAE: 0.26393723019037146


In [17]:
parameters = {'kernel':('linear', 'rbf'), 'C':[1, 5, 10, 12]}
svr_model = SVR()
svr = GridSearchCV(svr_model, parameters)
svr.fit(X_train, y_train)
print('Лучшие гиперпараметры', svr.best_params_)
y_pred = svr.predict(X_test)
print('Метрика R2:', r2_score(y_test, y_pred))
print('MAE:', mean_absolute_error(y_test, y_pred))

Лучшие гиперпараметры SVR(C=12)
Метрика R2: 0.9582851577546982
MAE: 0.16804714849495195


In [16]:
forest_model = RandomForestRegressor(warm_start = True)
parameters = {'n_estimators':(100, 150, 200), 
                'criterion':['squared_error', 'absolute_error'],
                'max_depth': [10, 15, 20],
                'max_features': ['sqrt', 'log2']}


forest = GridSearchCV(forest_model, parameters)
forest.fit(X_train, y_train)
print('Лучшие гиперпараметры', forest.best_params_)
y_pred = forest.predict(X_test)
print('Метрика R2:', r2_score(y_test, y_pred))
print('MAE:', mean_absolute_error(y_test, y_pred))

Лучшие гиперпараметры RandomForestRegressor(max_depth=20, max_features='log2', n_estimators=150,
                      warm_start=True)
Метрика R2: 0.9381254897286039
MAE: 0.20787898523315865


In [12]:
#K-ближайших соседей
neigh_model = KNeighborsRegressor()
parameters = {'n_neighbors': (3, 5, 7, 9, 11), 
                'weights': ['uniform', 'distance'],
                'algorithm': ['auto', 'ball_tree', 'kd_tree']}


neigh = GridSearchCV(neigh_model, parameters)
neigh.fit(X_train, y_train)
print('Лучшие гиперпараметры', neigh.best_params_)
y_pred = neigh.predict(X_test)
print('Метрика R2:', r2_score(y_test, y_pred))
print('MAE:', mean_absolute_error(y_test, y_pred))

Лучшие гиперпараметры {'algorithm': 'auto', 'n_neighbors': 9, 'weights': 'distance'}
Метрика R2: 0.9128641605892815
MAE: 0.2505828366632603
