In [51]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.feature_selection import VarianceThreshold
import numpy as np

# load in dataset
data = pd.read_csv('forestfires.csv')  

# checks for missing values
data.replace('?', np.nan, inplace=True)
missvalues = data.isnull().sum().sum()

# handle missing values if any are found
if missvalues > 0:
    data.dropna(inplace=True)  # removes rows with missing values

# encodes the non-numeric columns in the data set
nonnumeric_col = data.select_dtypes(include=['object']).columns
labelen = {}
for column in nonnumeric_col:
    le = LabelEncoder()
    data[column] = le.fit_transform(data[column])
    labelen[column] = le  # stores encoder

# last column is the target
target = data.columns[-1]
x = data.drop(columns=[target])
y = data[target]

# applies variance thresholding to remove low-variance features
threshold = 0.05 
selector = VarianceThreshold(threshold=threshold)
xreduced = selector.fit_transform(x)

# listed of removed columns and updates x
removed = x.columns[~selector.get_support()].tolist()
print("Columns removed:", removed)
x = x.loc[:, selector.get_support()]

# standardizes features
scaler = StandardScaler()
xscaled = scaler.fit_transform(X)

# make target variable as binary
yencoder = LabelEncoder()
y = yencoder.fit_transform(y)


Columns removed: ['monthapr', 'monthdec', 'monthfeb', 'monthjan', 'monthjun', 'monthmay', 'monthnov', 'monthoct']


In [52]:


# splits data into training and testing sets
xtrain, xtest, ytrain, ytest = train_test_split(xscaled, y, test_size=0.25, random_state=0)

kernels = ['rbf', 'linear', 'poly', 'sigmoid']
paramgrid = {
    'C': [1000, 100, 10, 1, 0.1, 0.001],
    'gamma': [1000, 100, 10, 1, 0.1, 0.01, 0.001, 0.0001]
}

results = []

for kernel in kernels:
    model = SVC(kernel=kernel, random_state=0)
    
    # adds degree parameter if using polynomial
    if kernel == 'poly':
        paramgrid['degree'] = [1, 2, 3, 4]
    else:
        paramgrid.pop('degree', None)
    
    # uses GridSearchCV to find best parameters for the model
    grid = GridSearchCV(model, paramgrid, cv=5, n_jobs=-1, scoring='accuracy')
    grid.fit(xtrain, ytrain)
    
    # finds best model with selected parameters
    bestmodel = grid.best_estimator_
    ypred = bestmodel.predict(xtest)
    
    # gets the metrics
    acc = accuracy_score(y_test, ypred)
    prec = precision_score(y_test, ypred, average='binary', pos_label=1)
    rec = recall_score(y_test, ypred, average='binary', pos_label=1)
    f1 = f1_score(y_test, ypred, average='binary', pos_label=1)
    
    # add results
    results.append([acc, prec, rec, f1])

# coverts to a 4x4 array
resultsarr = np.array(results)
print("Results:\n", resultsarr)



Results:
 [[0.94615385 0.93       1.         0.96373057]
 [0.96923077 0.97849462 0.97849462 0.97849462]
 [0.96923077 0.97849462 0.97849462 0.97849462]
 [0.91538462 0.91       0.97849462 0.94300518]]


In [53]:
"""
1. Did your code produce any warnings? What were they? 
   Why do you think this may have happened for some models but not for others?

   The code did not produce any warnings for me. This suggests that all kernels and parameter ranges were compatible with 
   the dataset and my device, allowing the models to converge without any visible issues. Warnings might typically occur if a model struggles 
   with complex boundaries (like higher-degree polynomial kernels), but here, the chosen parameters 
   most likely prevented these convergence issues.

2. Which kernel(s) led to the best performance? Would you expect this always to be the case?

   The linear and polynomial kernels with degree = 1 showed the best performance, both achieving an accuracy of 0.9692 along with 
   high and balanced precision, recall, and F1 scores (around 0.9785). As answered in the next question these two type of models 
   actually behave the same. These kernals were best due to the data's decision boundary aligning well with a linear. However, for 
   more complex, non-linear data patterns, the RBF or sigmoid kernels may perform better. So, while linear or polynomial kernels worked 
   best here, this may not always be the case for all data.

3. Does the polynomial kernel with degree=1 behave the same as the linear kernel? Why or why not?

   Yes, the polynomial kernel with degree = 1 behaves the same as the linear kernel. With degree = 1, the polynomial kernel 
   creates a linear decision boundary, which is basically the same to that of the linear kernel. Therefore, both kernels produced 
   the same performance results, as they both model linear relationships. 
"""


