In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [None]:
data = pd.read_csv("forestfires.csv")

In [None]:
data.head()

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area
0,7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,0.0
1,7,4,oct,tue,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,0.0
2,7,4,oct,sat,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,0.0
3,8,6,mar,fri,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,0.0
4,8,6,mar,sun,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,0.0


In [None]:
data['month'] = pd.factorize(data["month"])[0]
data['day'] = pd.factorize(data["day"])[0]

In [None]:
X, y = data.drop(columns=["area"]), data["area"]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=0.25, 
                                                    random_state=3)

In [None]:
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.svm import SVR, LinearSVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, BaggingRegressor
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import cross_val_score

In [None]:
models = [LinearRegression(), Ridge(alpha=1.0, random_state=42), Lasso(alpha=1.0, random_state=42), 
          SVR(kernel='rbf'), SVR(kernel='linear'), SVR(kernel='sigmoid'),
          LinearSVR(), KNeighborsRegressor(n_neighbors=5), KNeighborsRegressor(n_neighbors=10),
          MLPRegressor(hidden_layer_sizes=(100, ), activation='relu', solver='adam', alpha=0.0001, max_iter=700),
          DecisionTreeRegressor(), RandomForestRegressor(n_estimators=100, random_state=42),
          BaggingRegressor(estimator=DecisionTreeRegressor(), n_estimators=100, random_state=42)]

In [None]:
for model in models:
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(model)
    scores_sq = cross_val_score(model, X, y, cv=5, scoring="neg_mean_squared_error")
    print(f"Mean squared error: {scores_sq.mean() * -1}")
    scores_abs = cross_val_score(model, X, y, cv=5, scoring="neg_mean_absolute_error")
    print(f"Mean absolute error: {scores_abs.mean() * -1}")
    print()

LinearRegression()
Mean squared error: 4178.557210028633
Mean absolute error: 21.155020391979747

Ridge(random_state=42)
Mean squared error: 4175.720419265169
Mean absolute error: 21.125111617118147

Lasso(random_state=42)
Mean squared error: 4164.815708850652
Mean absolute error: 20.861461164864885

SVR()
Mean squared error: 4216.7556376504235
Mean absolute error: 13.270455868478763

SVR(kernel='linear')
Mean squared error: 4202.657015902923
Mean absolute error: 13.343285112810701

SVR(kernel='sigmoid')
Mean squared error: 4188.330625732815
Mean absolute error: 13.122185144474628

LinearSVR()
Mean squared error: 5063.496433285569




Mean absolute error: 17.314926015844897

KNeighborsRegressor()
Mean squared error: 5552.449845469305
Mean absolute error: 25.419768409260637

KNeighborsRegressor(n_neighbors=10)
Mean squared error: 4835.468216988124
Mean absolute error: 23.55045459297984

MLPRegressor(max_iter=700)
Mean squared error: 4082.5952898441
Mean absolute error: 26.038603500714355

DecisionTreeRegressor()
Mean squared error: 5412.29857686333
Mean absolute error: 24.9494068334578

RandomForestRegressor(random_state=42)
Mean squared error: 4717.294716473354
Mean absolute error: 25.02421066808919

BaggingRegressor(estimator=DecisionTreeRegressor(), n_estimators=100,
                 random_state=42)
Mean squared error: 4739.448912983071
Mean absolute error: 25.389148834177956



Выводы по результатам модели (самые лучшие, самые худшие)

Вывод по тому можно ли предсказывать эти значения ML алгоритмами