# Support Vector Machine Cloud Detection
1. Data Preprocessing

In [1]:
import numpy as np
# Grab Dataset
data = np.genfromtxt('../data.csv', delimiter=',')

In [2]:
# Split data into y (label) and x (features)
y = data[:,0]
x = data[:,1:22]
x = np.nan_to_num(x, nan=0.0)

In [3]:
# Any multi-layer cloud labels set to 1, turn into classification [cloud, no cloud]=[1,0]
y = np.where(y >= 1, 1, y)
print(y[0])

0.0


In [4]:
# Generate 1250 random indices
np.random.seed(7)
random_indices = np.random.choice(len(x), size=1250, replace=False)

# Select samples using the random indices
x = x[random_indices,:]
y = y[random_indices]

In [5]:
# cut down on features
# 0 and 6 were chosen from Random Forest's Feature Importance
x = x[:,[0,6]]

In [6]:
from sklearn.model_selection import train_test_split
# Split data into Train/Test/Validation 80/20
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [7]:
from sklearn.preprocessing import StandardScaler
# Scale data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

2. Build Model
3. Train

In [8]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Linear
svm_model = SVC(kernel='linear', C=1)

svm_model.fit(X_train, y_train)

y_pred = svm_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.748


In [9]:
# RBF
svm_model = SVC(kernel='rbf', C=1)

svm_model.fit(X_train, y_train)

y_pred = svm_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.772


In [10]:
# Poly (3 Degrees)
svm_model = SVC(kernel='poly', C=1)

svm_model.fit(X_train, y_train)

y_pred = svm_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.76
