In [2]:
from sklearn.tree import DecisionTreeClassifier
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.svm import SVC
from sklearn.preprocessing import scale
from sklearn import metrics

#Read data from the spreadsheet
data = pd.read_csv("cleaned_mlready_combinedData1.csv")

In [3]:
# check out fire and no fire ratio
data["Fire"].value_counts()
data.dropna()
data.shape

(7271, 15)

In [4]:
## Set up the label variable as y
y = data['Fire']

## Dropping feature 'label'
X = data.drop(columns = 'Fire')

In [5]:
# Scale the set
X_scaled = scale(X)

In [6]:
# Split the set into randomized training and test feature/label sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size = 0.6, train_size = 0.4 ,random_state = 4)

print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(2908, 14)
(4363, 14)
(2908,)
(4363,)


In [7]:
# RBF Kernel, C=1, default value of gamma

# Create Model
rbfKernelModel = SVC(C=100, gamma=50, kernel='rbf')
# Train Model
rbfKernelModel.fit(X_train, y_train)
# Create label prediction
labelPrediction = rbfKernelModel.predict(X_test)

#Print accuracy and confusion matrix
print("Accuracy RBF:", metrics.accuracy_score(y_true=y_test, y_pred=labelPrediction), "\n")

Accuracy RBF: 0.9841851936740774 



In [8]:
#baseline predictions, amd confusion matrix of the baseline model
prediction_base = rbfKernelModel.predict(X_test)
confusion_matrix(y_test, prediction_base, labels=[0,1])

#Get report on base model
print(classification_report(y_test,prediction_base))

              precision    recall  f1-score   support

           0       0.98      1.00      0.99      4294
           1       0.00      0.00      0.00        69

    accuracy                           0.98      4363
   macro avg       0.49      0.50      0.50      4363
weighted avg       0.97      0.98      0.98      4363



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [11]:
from imblearn.over_sampling import SMOTE, ADASYN
X_resampled, y_resampled = SMOTE().fit_resample(X_scaled, y)

# Split the set into randomized training and test feature/label sets
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size = 0.3, train_size = 0.7 ,random_state = 10)

# Create Model
rbfKernelResampledModel = SVC(kernel='rbf')
# Train Model
rbfKernelResampledModel.fit(X_train, y_train)
# Create label prediction
labelPredictionResampled = rbfKernelResampledModel.predict(X_test)

In [12]:
#Print accuracy and confusion matrix
print("Accuracy RBF Resampled:", metrics.accuracy_score(y_true=y_test, y_pred=labelPredictionResampled), "\n")

#baseline predictions, amd confusion matrix of the baseline model
prediction_base_rbf = rbfKernelResampledModel.predict(X_test)
confusion_matrix(y_test, prediction_base_rbf, labels=[0,1])

#Get report on base model
print(classification_report(y_test,prediction_base_rbf))

Accuracy RBF Resampled: 0.8595714951094551 

              precision    recall  f1-score   support

           0       0.98      0.74      0.84      2169
           1       0.79      0.98      0.87      2125

    accuracy                           0.86      4294
   macro avg       0.88      0.86      0.86      4294
weighted avg       0.88      0.86      0.86      4294

