## SVC Classifier
The model predicts the severity of the landslide (or if there will even be one) within the next 2 days, based on weather data from the past 5 days.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import sklearn
from sklearn.utils import shuffle
import pickle

In [2]:
df = pd.read_csv("full_dataset_v1.csv")

  interactivity=interactivity, compiler=compiler, result=result)


In [3]:
df['severity'].value_counts()

medium          3300
na              1321
small            613
large            427
unknown           75
very_large        56
...                5
catastrophic       1
Name: severity, dtype: int64

In [4]:
# filter by severity. na is for non-landslide data
df = df[df['severity'].isin(["medium", "small", "large", "very_large", "na"])]

In [5]:
df['severity'].value_counts()
df = shuffle(df)
df.reset_index(inplace=True, drop=True)
print(len(df))

5717


In [49]:
X = df.copy()

X.drop(X.columns[[i for i in range(0, 151)]], axis = 1, inplace = True)
# X.drop(X.columns[[i for i in range(20, 35)]], axis = 1, inplace = True)
X["severity"] = df["severity"]
X.drop(X.columns[[0]], axis = 1, inplace = True)

X = X.dropna()
for i in range(0, 8):
    del X['air' + str(i)]
    del X['temp' + str(i)]
    del X['humidity' + str(i)]
    
X.drop(X.columns[[i for i in range(len(X.columns)-8, len(X.columns)-2)]], axis = 1, inplace = True)
X

Unnamed: 0,precip7,wind7,precip6,wind6,precip5,wind5,precip4,wind4,precip3,wind3,slope,severity
0,2.0,14.0,23.4,12.0,11.2,5.0,11.5,5.0,0.0,5.0,41.0,medium
1,6.4,10.0,3.0,8.0,0.3,10.0,0.8,9.0,5.9,9.0,106.0,large
2,1.3,12.0,6.4,13.0,0.1,13.0,5.1,14.0,0.4,13.0,38.0,medium
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,57.0,medium
4,1.5,22.0,0.6,19.0,0.8,18.0,0.2,20.0,5.1,27.0,48.0,small
...,...,...,...,...,...,...,...,...,...,...,...,...
5712,0.6,13.0,0.1,13.0,0.0,6.0,1.0,7.0,1.4,9.0,62.0,na
5713,12.1,4.0,9.6,3.0,6.6,3.0,2.7,4.0,3.8,3.0,-1.0,medium
5714,1.6,11.0,1.4,12.0,1.1,14.0,0.3,11.0,1.5,11.0,5.0,medium
5715,1.4,5.0,0.6,6.0,0.5,10.0,2.1,8.0,1.5,7.0,110.0,medium


In [50]:
y = []
idx_to_severity = ["na", "small", "medium", "large", "very_large"]
for severity in X.severity:
    y.append(idx_to_severity.index(severity))
X.drop(X.columns[[-1]], axis = 1, inplace = True)
print(y.count(1))

613


## Scaling

In [51]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

scaler = StandardScaler()
scaler.fit(X_train)

X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

## PCA

In [52]:
from sklearn.decomposition import PCA
pca = PCA(0.9)

principalComponents = pca.fit_transform(X_train)
pca.explained_variance_ratio_

array([0.36275043, 0.22088221, 0.09103755, 0.08110706, 0.06188443,
       0.05209437, 0.04527199])

In [53]:
X_train = pca.transform(X_train)
X_test = pca.transform(X_test)

In [54]:
from sklearn.svm import SVC
svclassifier = SVC(kernel='rbf', verbose = True)

In [55]:
svclassifier.fit(X_train, y_train)

[LibSVM]

SVC(verbose=True)

In [56]:
from sklearn.metrics import accuracy_score
pred = svclassifier.predict(X_test)
print("ACCURACY:" ,accuracy_score(pred, y_test))

ACCURACY: 0.5638111888111889


## Testing gamma and c

In [26]:
from sklearn.metrics import accuracy_score
C_range =[1, 10, 100]
gamma_range = [0.1, 1, 10, 100]
for c in C_range:
    for g in gamma_range:
        svc2 = SVC(kernel='rbf', gamma=g,C=c, verbose = True)
        svc2.fit(X_train, y_train)
        print(c, g, ":",accuracy_score(y_test, svc2.predict(X_test)))

[LibSVM]1 0.1 : 0.5777972027972028
[LibSVM]1 1 : 0.5769230769230769
[LibSVM]1 10 : 0.576048951048951
[LibSVM]1 100 : 0.576048951048951
[LibSVM]10 0.1 : 0.5646853146853147
[LibSVM]10 1 : 0.5786713286713286
[LibSVM]10 10 : 0.576048951048951
[LibSVM]10 100 : 0.576048951048951
[LibSVM]100 0.1 : 0.5638111888111889
[LibSVM]100 1 : 0.5786713286713286
[LibSVM]100 10 : 0.576048951048951
[LibSVM]100 100 : 0.576048951048951


In [22]:
accuracy_score(y_test, svc2.predict(X_test))

0.576048951048951