## SVC Classifier
The model predicts the severity of the landslide (or if there will even be one) within the next 2 days, based on weather data from the past 5 days.

In [58]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import sklearn
from sklearn.utils import shuffle
import pickle

In [59]:
df = pd.read_csv("full_dataset_v1.csv")

  interactivity=interactivity, compiler=compiler, result=result)


In [60]:
df['severity'].value_counts()

medium          3300
na              1321
small            613
large            427
unknown           75
very_large        56
...                5
catastrophic       1
Name: severity, dtype: int64

In [61]:
# filter by severity. na is for non-landslide data
df = df[df['severity'].isin(["medium", "small", "large", "very_large", "na"])]

In [62]:
df['severity'].value_counts()
df = shuffle(df)
df.reset_index(inplace=True, drop=True)
print(len(df))

5717


In [69]:
X = df.copy()

X.drop(X.columns[[i for i in range(0, 151)]], axis = 1, inplace = True)
# X.drop(X.columns[[i for i in range(20, 35)]], axis = 1, inplace = True)
X["severity"] = df["severity"]
X.drop(X.columns[[0]], axis = 1, inplace = True)

X = X.dropna()
for i in range(0, 8):
    del X['air' + str(i)]
    del X['temp' + str(i)]
    del X['humidity' + str(i)]
    
X.drop(X.columns[[i for i in range(len(X.columns)-8, len(X.columns)-2)]], axis = 1, inplace = True)
X

Unnamed: 0,precip7,wind7,precip6,wind6,precip5,wind5,precip4,wind4,precip3,wind3,slope,severity
0,2.9,12.0,1.0,9.0,15.6,7.0,0.3,6.0,7.3,7.0,8.0,small
1,0.0,12.0,0.3,11.0,0.0,11.0,0.0,12.0,0.0,12.0,125.0,medium
2,0.7,35.0,0.0,25.0,0.0,12.0,0.1,23.0,0.0,28.0,42.0,medium
3,0.1,3.0,0.5,3.0,0.0,5.0,0.1,4.0,0.4,7.0,170.0,na
4,1.7,10.0,3.6,10.0,8.1,16.0,3.3,11.0,0.1,10.0,26.0,na
...,...,...,...,...,...,...,...,...,...,...,...,...
5712,0.0,12.0,0.4,11.0,0.0,10.0,0.0,12.0,0.0,10.0,99.0,large
5713,20.9,29.0,4.1,18.0,6.2,20.0,3.2,26.0,3.6,23.0,47.0,medium
5714,0.5,17.0,0.0,15.0,0.0,17.0,0.0,18.0,0.0,18.0,97.0,na
5715,2.4,17.0,1.8,14.0,3.5,15.0,3.0,15.0,6.9,17.0,68.0,medium


In [70]:
def generate_labels(binary = False):
    y = []
    idx_to_severity = ["na", "small", "medium", "large", "very_large"]
    for severity in X.severity:
        if binary:
            y.append(idx_to_severity.index(severity))
        elif severity == "na":
            y.append(0)
        else:
            y.append(1)
    X.drop(X.columns[[-1]], axis = 1, inplace = True)
    print(y.count(1))
    return y

In [71]:
y = generate_labels(True)

613


## Scaling

In [72]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

scaler = StandardScaler()
scaler.fit(X_train)

X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

## PCA

In [73]:
from sklearn.decomposition import PCA
pca = PCA(0.9)

principalComponents = pca.fit_transform(X_train)
pca.explained_variance_ratio_

array([0.36520976, 0.22117808, 0.09102161, 0.0785012 , 0.06158014,
       0.05319635, 0.04622439])

In [74]:
X_train = pca.transform(X_train)
X_test = pca.transform(X_test)

In [75]:
from sklearn.svm import SVC
svclassifier = SVC(kernel='rbf', verbose = True)

In [76]:
svclassifier.fit(X_train, y_train)

[LibSVM]

SVC(verbose=True)

In [77]:
from sklearn.metrics import accuracy_score
pred = svclassifier.predict(X_test)
print("ACCURACY:" ,accuracy_score(pred, y_test))

ACCURACY: 0.5812937062937062


## Testing gamma and c

In [78]:
from sklearn.metrics import accuracy_score
C_range =[1, 10, 100]
gamma_range = [0.1, 1, 10, 100]
for c in C_range:
    for g in gamma_range:
        svc2 = SVC(kernel='rbf', gamma=g,C=c, verbose = True)
        svc2.fit(X_train, y_train)
        print(c, g, ":",accuracy_score(y_test, svc2.predict(X_test)))

[LibSVM]1 0.1 : 0.5812937062937062
[LibSVM]1 1 : 0.5865384615384616
[LibSVM]1 10 : 0.6031468531468531
[LibSVM]1 100 : 0.6153846153846154
[LibSVM]10 0.1 : 0.583041958041958
[LibSVM]10 1 : 0.5629370629370629
[LibSVM]10 10 : 0.5821678321678322
[LibSVM]10 100 : 0.6092657342657343
[LibSVM]100 0.1 : 0.5655594405594405
[LibSVM]100 1 : 0.5218531468531469
[LibSVM]100 10 : 0.5865384615384616
[LibSVM]100 100 : 0.6092657342657343


In [79]:
accuracy_score(y_test, svc2.predict(X_test))

0.6092657342657343