In [32]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, LogisticRegression, Ridge, Lasso
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import SGDRegressor

In [33]:
# Load the data into a pandas DataFrame
data = pd.read_csv('forestfires.csv', delimiter=',')

In [34]:
data.head()

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area
0,7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,0.0
1,7,4,oct,tue,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,0.0
2,7,4,oct,sat,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,0.0
3,8,6,mar,fri,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,0.0
4,8,6,mar,sun,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,0.0


In [35]:
# Split the data into features (X) and target variable (Y)
X = data.drop(['Y'], axis=1)
Y = data['Y']

In [36]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=0)

In [37]:
# Concatenate the training and testing data
combined_data = pd.concat([X_train, X_test])

# Perform one-hot encoding on the combined data
combined_data_encoded = pd.get_dummies(combined_data, columns=['month', 'day'])

In [38]:
# Split the combined data back into training and testing sets
X_train_encoded = combined_data_encoded[:len(X_train)]
X_test_encoded = combined_data_encoded[len(X_train):]

**Linear Regression**

In [39]:
linear_reg = LinearRegression()
linear_reg.fit(X_train_encoded, y_train)
linear_reg_predictions = linear_reg.predict(X_test_encoded)
linear_reg_mse = mean_squared_error(y_test, linear_reg_predictions)
print("Linear Regression Predictions:", linear_reg_predictions)
print("Linear Regression MSE:", linear_reg_mse)

Linear Regression Predictions: [5.12566012 3.89221585 4.90418179 4.29774826 5.36678989 4.64026414
 4.62812646 4.37014564 5.58892324 3.33678084 4.045781   4.80091814
 4.3633906  3.94634091 4.95053532 3.46751406 4.71240876 3.30050185
 3.45603752 4.32987585 4.72617487 3.77270561 4.23598214 3.24977412
 3.56124877 3.80863522 4.34068669 4.83328447 4.56202659 4.11174851
 4.42296934 4.06383202 4.44981434 4.56462392 5.54091391 5.3047592
 4.05312501 4.75271396 5.09158499 5.22460361 3.41473081 3.16620537
 4.7573643  3.90384766 3.36712304 4.88536004 5.27284487 3.53701615
 3.53891078 4.77858088 5.18778777 3.80267034 4.29155258 4.39303505
 4.33637372 4.74781414 5.18113912 4.79407152 3.8177406  5.16714375
 4.37154208 3.71352384 5.61905667 4.09570014 4.90978592 3.6232232
 4.00045611 3.75907635 5.06012172 3.92245326 3.14511229 3.34164072
 4.59767946 5.27137007 4.62980938 4.96449943 4.67520016 3.45611812
 4.83067541 4.20216144 3.34274577 3.97756782 3.71081754 3.40080901
 4.28855944 4.7607562  5.18629486

**Polynomial Regression**

In [40]:
# Create polynomial features
poly_features = PolynomialFeatures(degree=2)
X_train_poly = poly_features.fit_transform(X_train_encoded)
X_test_poly = poly_features.transform(X_test_encoded)

# Create and fit the Polynomial Regression model
poly_reg = LinearRegression()
poly_reg.fit(X_train_poly, y_train)

# Make predictions on the testing data
poly_reg_predictions = poly_reg.predict(X_test_poly)

# Evaluate the model's performance
poly_reg_mse = mean_squared_error(y_test, poly_reg_predictions)

print("Polynomial Regression Predictions:", poly_reg_predictions)
print("Polynomial Regression MSE:", poly_reg_mse)

Polynomial Regression Predictions: [ 4.37782076e+00  1.03681144e+00  3.63827154e+00  5.93589604e+00
  5.39299762e+00  4.37865421e+00  5.10053775e+00  2.85564435e+00
  5.37345573e+00 -2.37179165e+04  2.41302788e+00  4.01391780e+00
 -6.95986734e+05  2.41569355e+00  3.60712072e+00  3.74229774e+00
  4.92507216e+00  7.31387851e+00 -8.13124156e+04  7.05359265e+00
  2.83166747e+05  3.79500517e+00  5.21594738e+04  3.39441726e+00
  2.61294290e+00  4.14403138e+00  5.49609077e+00  3.19181888e+05
  5.28354442e+00  4.60069212e+00  4.00480625e+00  4.54139945e+00
  4.31775019e+00  6.15215155e+00  5.07796633e+00  1.28519292e+01
  3.71553949e+00  6.50922975e+00 -3.06382243e+01  7.21557423e+00
  6.07733855e+00  2.77990815e+00  3.96194813e+00  4.83039841e+00
  2.41130885e+00  3.75861582e+00  2.34463903e+00  4.33796892e+00
  2.95895731e+00  4.50957689e+00  5.55755517e+00  4.66247436e+00
  3.70434543e+00  6.52776250e+00  2.66700861e+00  2.58541039e+00
 -3.48874764e+03  5.82856378e+00  3.72512540e+00  6.244

**Ridge Regression**

In [41]:
# Create and fit the Ridge Regression model
ridge_reg = Ridge(alpha=0.5)  # Adjust the regularization strength (alpha) as needed
ridge_reg.fit(X_train_encoded, y_train)

# Make predictions on the testing data
ridge_reg_predictions = ridge_reg.predict(X_test_encoded)

# Evaluate the model's performance
ridge_reg_mse = mean_squared_error(y_test, ridge_reg_predictions)

print("Ridge Regression Predictions:", ridge_reg_predictions)
print("Ridge Regression MSE:", ridge_reg_mse)

Ridge Regression Predictions: [5.13904108 3.84146505 4.95681482 4.2697445  5.35549484 4.61895508
 4.6447805  4.3516241  5.55548181 3.35614473 4.05137392 4.73747305
 4.36562236 3.95853621 4.9393968  3.40660551 4.69596298 3.29279815
 3.47621489 4.33062724 4.69880401 3.79446797 4.27995048 3.30236943
 3.55476001 3.78471845 4.33651023 4.83915371 4.59070785 4.13374773
 4.39328659 4.03281026 4.48313868 4.56509641 5.53771995 5.30344445
 4.03413094 4.79505124 5.10148704 5.21285309 3.46468133 3.20029994
 4.76945328 3.89467298 3.39913867 4.84606008 5.41887193 3.56040815
 3.56280627 4.78994514 5.16320803 3.84044672 4.31298607 4.43502676
 4.39214205 4.70326332 5.20320792 4.78927881 3.83962867 5.187211
 4.38807607 3.6602611  5.6388327  4.09344625 4.92370718 3.59159186
 4.01985765 3.72920869 5.08436599 3.92025889 3.17944061 3.37730514
 4.5976407  5.27455992 4.59297588 4.94419466 4.66058735 3.48272538
 4.83967156 4.21861605 3.37208419 3.90332404 3.75073654 3.64854856
 4.22169869 4.80903368 5.18839055 

**Lasso Regression**

In [42]:
# Create and fit the Lasso Regression model
lasso_reg = Lasso(alpha=0.1)  # Adjust the regularization strength (alpha) as needed
lasso_reg.fit(X_train_encoded, y_train)

# Make predictions on the testing data
lasso_reg_predictions = lasso_reg.predict(X_test_encoded)

# Evaluate the model's performance
lasso_reg_mse = mean_squared_error(y_test, lasso_reg_predictions)

print("Lasso Regression Predictions:", lasso_reg_predictions)
print("Lasso Regression MSE:", lasso_reg_mse)

Lasso Regression Predictions: [5.04614232 3.62816116 5.06571473 4.122518   5.12019705 4.59622607
 4.74654578 4.1150871  5.3003814  3.9514756  3.87248256 4.08763967
 4.21155285 3.94963894 4.8716452  3.33295328 4.66060219 3.37766877
 3.93720358 4.14109271 4.68774309 3.98444076 4.97956444 3.49699926
 3.57658417 3.91750405 4.21929935 4.94809439 4.51773117 4.20968529
 4.33829115 3.96616547 4.52431913 4.67078316 5.28253873 5.31132505
 3.94388485 4.81252362 5.22808058 5.1535184  3.5070633  3.50254439
 4.76678449 3.97938155 3.46018012 4.84059752 5.3725662  3.68606194
 3.52887554 4.76476891 5.21858249 4.01866663 4.22914056 4.54240205
 4.54440533 4.68037444 5.51495125 4.65814477 3.98245064 5.1193739
 4.5722155  3.55474103 5.40969043 4.11524428 4.77170933 3.655531
 4.05319646 3.70426526 5.50837783 4.14265384 3.50845324 3.46018012
 4.76481658 5.31534018 4.33361159 4.71391865 4.59903114 3.53626188
 4.76709467 4.23994726 3.47317834 3.56488034 4.22239034 4.60282823
 4.03546361 4.79066228 4.92348772 3

**Decision Tree Regression**

In [43]:
# Create and fit the Decision Tree Regression model
dt_reg = DecisionTreeRegressor(random_state=0)  # Set the random_state for reproducibility
dt_reg.fit(X_train_encoded, y_train)

# Make predictions on the testing data
dt_reg_predictions = dt_reg.predict(X_test_encoded)

# Evaluate the model's performance
dt_reg_mse = mean_squared_error(y_test, dt_reg_predictions)

print("Decision Tree Regression Predictions:", dt_reg_predictions)
print("Decision Tree Regression MSE:", dt_reg_mse)

Decision Tree Regression Predictions: [5. 5. 4. 5. 6. 4. 5. 5. 6. 4. 4. 4. 6. 4. 4. 3. 4. 5. 4. 4. 5. 3. 3. 4.
 5. 4. 3. 5. 3. 3. 4. 4. 4. 3. 6. 8. 4. 4. 5. 9. 4. 2. 4. 3. 4. 4. 4. 4.
 4. 3. 6. 5. 5. 3. 4. 4. 9. 3. 4. 6. 4. 2. 5. 3. 5. 2. 3. 4. 9. 5. 2. 3.
 4. 6. 5. 5. 5. 2. 4. 4. 5. 3. 5. 5. 5. 4. 6. 4. 4. 4. 4. 4. 3. 4. 2. 6.
 4. 4. 6. 4. 5. 4. 3. 6.]
Decision Tree Regression MSE: 1.4903846153846154


**Random Forest Regression**

In [44]:
# Create and fit the Random Forest Regression model
rf_reg = RandomForestRegressor(random_state=0)  # Set the random_state for reproducibility
rf_reg.fit(X_train_encoded, y_train)

# Make predictions on the testing data
rf_reg_predictions = rf_reg.predict(X_test_encoded)

# Evaluate the model's performance
rf_reg_mse = mean_squared_error(y_test, rf_reg_predictions)

print("Random Forest Regression Predictions:", rf_reg_predictions)
print("Random Forest Regression MSE:", rf_reg_mse)

Random Forest Regression Predictions: [4.02 3.97 5.39 4.85 5.76 3.94 4.63 4.35 5.95 4.07 4.57 4.14 4.72 4.27
 4.03 2.49 4.1  3.83 4.02 4.36 3.85 4.01 4.15 4.18 4.09 3.66 3.58 4.4
 3.97 3.63 4.38 4.67 3.97 4.28 5.23 6.68 4.44 4.48 4.62 6.96 3.74 2.2
 4.07 4.13 3.09 4.28 6.14 3.88 4.08 3.95 5.96 4.36 4.42 4.42 4.04 3.77
 6.39 4.33 4.07 5.92 4.13 2.71 5.13 4.11 4.27 2.65 3.44 3.92 6.67 4.21
 3.03 3.51 4.41 5.95 4.54 4.85 4.22 2.32 4.17 3.99 4.39 2.76 4.27 4.95
 4.34 3.8  4.68 4.04 3.83 4.43 4.42 5.11 4.06 4.03 3.33 6.16 4.15 3.99
 5.09 3.93 4.2  4.   3.93 5.71]
Random Forest Regression MSE: 0.9450673076923077


**Gradient Boosting Regression**

In [45]:
# Create and fit the Gradient Boosting Regression model
gb_reg = GradientBoostingRegressor(random_state=0)  # Set the random_state for reproducibility
gb_reg.fit(X_train_encoded, y_train)

# Make predictions on the testing data
gb_reg_predictions = gb_reg.predict(X_test_encoded)

# Evaluate the model's performance
gb_reg_mse = mean_squared_error(y_test, gb_reg_predictions)

print("Gradient Boosting Regression Predictions:", gb_reg_predictions)
print("Gradient Boosting Regression MSE:", gb_reg_mse)

Gradient Boosting Regression Predictions: [4.14322733 3.37771785 5.18670315 4.83217128 6.11913961 3.88707861
 4.57886021 3.99917424 6.34531337 3.96074432 4.25895604 4.06589423
 4.59659535 4.24185836 4.29712026 2.90649033 4.19250669 3.84474587
 3.82814509 4.37105991 4.01834958 4.21407476 4.21286569 4.2963412
 3.9408454  4.16809914 3.88440337 4.24055576 4.06584326 4.06031375
 4.53229116 4.92181486 4.34919333 4.28660298 5.77400335 5.94138161
 4.26872679 4.25529513 3.56057099 7.6251681  3.67260114 2.94434715
 4.10623869 4.18575249 2.97276844 4.02989135 5.16610675 4.03595225
 3.6891819  4.00474302 5.65697075 4.46641584 4.13802686 4.11066868
 4.19349995 3.88599766 7.11600644 4.24226005 4.07827126 5.71214416
 4.05824931 3.24345766 5.2824823  4.02075342 4.14090338 2.89193515
 3.70414555 4.42372494 6.78993357 3.73121715 2.65532452 3.42269708
 4.07306651 6.2214396  4.44429241 4.67050579 4.55038454 2.83623462
 4.16864734 4.12058738 3.93701399 3.25198386 4.29606708 4.68013181
 4.57584    4.1927687

**Support Vector Regression**

In [46]:
# Create and fit the SVR model
svr = SVR()
svr.fit(X_train_encoded, y_train)

# Make predictions on the testing data
svr_predictions = svr.predict(X_test_encoded)

# Evaluate the model's performance
svr_mse = mean_squared_error(y_test, svr_predictions)

print("Support Vector Regression Predictions:", svr_predictions)
print("Support Vector Regression MSE:", svr_mse)

Support Vector Regression Predictions: [4.21948497 4.24869232 4.09793769 4.11333206 4.09343612 4.07250583
 4.20880475 4.10400486 4.11898739 4.13864096 4.08038705 4.19792685
 4.27902098 4.18760651 4.12949692 4.14526738 4.09161109 4.09806028
 4.10442593 4.21529647 4.07416722 4.07472266 4.15486651 4.09349684
 4.15100515 4.07257384 4.06289548 4.09338089 4.0839457  4.06961977
 4.07622575 4.11274225 4.08746222 4.09141896 4.10225296 4.2142835
 4.17177977 4.07910981 5.00937711 4.10313251 4.09054548 4.06931995
 4.07206042 4.06575773 4.07298327 4.07363961 4.11523769 4.06696778
 4.07110114 4.07643167 4.11251967 4.09387373 4.23420951 4.10254656
 4.09622753 4.06950306 4.18524017 4.08310601 4.07483725 4.09666658
 4.08691097 4.13232855 4.25023195 4.09462896 4.09190752 4.2079545
 4.07491852 4.10934678 4.15057619 4.1189082  4.06791714 4.07351029
 4.06973657 4.10283735 4.07187344 4.15085256 4.12301038 4.0695113
 4.07861798 4.07554897 4.07664668 4.16232111 4.1622007  4.24068005
 4.09145047 4.08077504 4.2

**Gradient Descent**

In [47]:
gd_reg = make_pipeline(StandardScaler(), SGDRegressor())
gd_reg.fit(X_train_encoded, y_train)
gd_reg_predictions = gd_reg.predict(X_test_encoded)
gd_reg_mse = mean_squared_error(y_test, gd_reg_predictions)
print("Gradient Descent Regression Predictions:", gd_reg_predictions)
print("Gradient Descent Regression MSE:", gd_reg_mse)

Gradient Descent Regression Predictions: [5.08844512 3.82418363 4.9933786  4.24488824 5.38423102 4.57795498
 4.62853385 4.37178321 5.5579461  3.21872147 4.10433987 4.67676876
 4.46117155 3.89470836 4.87035727 3.3371641  4.70037533 3.3247822
 3.32537213 4.37908236 4.71096858 3.83068424 4.15657002 3.33273337
 3.50328636 3.81814437 4.34343679 4.91041369 4.63992583 4.13500745
 4.298572   4.00133767 4.5247521  4.62081745 5.58280568 5.36045069
 3.98915295 4.89155838 5.36522151 5.23838245 3.59382796 3.30234952
 4.76958711 3.88417096 3.43877606 4.76813989 5.67565177 3.5588489
 3.6368725  4.8110902  5.16329041 3.89274003 4.28658697 4.51593339
 4.42573068 4.72465552 5.09607867 4.8157718  3.8560464  5.25586713
 4.44620314 3.61761264 5.75956921 4.07496174 4.95420524 3.57800565
 4.07759376 3.6955056  4.96395813 3.96994739 3.28796498 3.40726321
 4.59103326 5.31670383 4.53745513 4.89622318 4.58947866 3.5790485
 4.85942602 4.22814073 3.37863587 3.84233343 3.64339127 3.8356175
 4.0814132  4.92983002 5.

**Performance** (A lower MSE indicates better performance)

In [48]:
print("Linear Regression MSE:", linear_reg_mse)
print("Polynomial Regression MSE:", poly_reg_mse)
print("Ridge Regression MSE:", ridge_reg_mse)
print("Lasso Regression MSE:", lasso_reg_mse)
print("Decision Tree Regression MSE:", dt_reg_mse)
print("Random Forest Regression MSE:", rf_reg_mse)
print("Gradient Boosting Regression MSE:", gb_reg_mse)
print("Support Vector Regression MSE:", svr_mse)
print("Gradient Descent Regression MSE:", gd_reg_mse)

Linear Regression MSE: 1.1687054659355256
Polynomial Regression MSE: 91815629753.16556
Ridge Regression MSE: 1.1687607684573598
Lasso Regression MSE: 1.1240039921687754
Decision Tree Regression MSE: 1.4903846153846154
Random Forest Regression MSE: 0.9450673076923077
Gradient Boosting Regression MSE: 0.9345229618597842
Support Vector Regression MSE: 1.673610338926847
Gradient Descent Regression MSE: 1.2112240775425176


In [49]:
# Initialize the best MSE variable
best_mse = float('inf') # It should be an initial value that is higher than the maximum possible MSE to ensure that the first model's MSE is assigned correctly.
best_model = None

# Compare the MSE values and find the best model
if linear_reg_mse < best_mse:
    best_mse = linear_reg_mse
    best_model = "Linear Regression"

if poly_reg_mse < best_mse:
    best_mse = poly_reg_mse
    best_model = "Polynomial Regression"

if ridge_reg_mse < best_mse:
    best_mse = ridge_reg_mse
    best_model = "Ridge Regression"

if lasso_reg_mse < best_mse:
    best_mse = lasso_reg_mse
    best_model = "Lasso Regression"

if dt_reg_mse < best_mse:
    best_mse = dt_reg_mse
    best_model = "Decision Tree Regression"

if rf_reg_mse < best_mse:
    best_mse = rf_reg_mse
    best_model = "Random Forest Regression"

if gb_reg_mse < best_mse:
    best_mse = gb_reg_mse
    best_model = "Gradient Boosting Regression"

if svr_mse < best_mse:
    best_mse = svr_mse
    best_model = "Support Vector Regression"

if gd_reg_mse < best_mse:
    best_mse = gd_reg_mse
    best_model = "Gradient Descent Regression"

print("The best model is:", best_model)


The best model is: Gradient Boosting Regression
