# Landslides

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

In [None]:
train = pd.read_csv('data/Train.csv')

In [None]:
train.head()

In [None]:
train.info()

In [None]:
train.isna().sum().sum()

In [None]:
train['Label'].value_counts()

## Overview plots

In [None]:
def plot4positions(feature, position, title, xlim, ylim, xlabel, ylabel, bin_number=40):
    fig, axs = plt.subplots(2, 2)
    i=0
    for a in range(2):
        for b in range(2):
            axs[a,b].hist(x = train[str(position[i])+'_'+feature], bins = bin_number)
            axs[a,b].set_xlim(xlim)
            axs[a,b].set_ylim(ylim)
            if a == 1:
                axs[a,b].set_xlabel(xlabel)
            if b == 0:
                axs[a,b].set_ylabel(ylabel)
            i+=1
    plt.suptitle(title);

In [None]:
position = [1, 8, 15, 23]

In [None]:
title='Digital elevation of the terrain surface in meter'
xlim = [0,1000]
ylim = [0,900]
xlabel='elevation in m'
ylabel='count'
plot4positions(feature='elevation',position=position, title=title, 
               xlim=xlim, ylim=ylim, xlabel=xlabel, ylabel=ylabel)

In [None]:
title='Angle of the slope inclination in degree'
xlim = [0,80]
ylim = [0,1500]
xlabel='slope in degree'
ylabel='count'
plot4positions(feature='slope',position=position, title=title, 
               xlim=xlim, ylim=ylim, xlabel=xlabel, ylabel=ylabel)

In [None]:
title='Exposition of the slope in degree'
xlim = [0,365]
ylim = [0,600]
xlabel='aspect in degree'
ylabel='count'
plot4positions(feature='aspect',position=position, title=title, 
               xlim=xlim, ylim=ylim, xlabel=xlabel, ylabel=ylabel)

In [None]:
title='Planform curvature'
xlim = [-0.15,0.15]
ylim = [0,2000]
xlabel='...'
ylabel='count'
plot4positions(feature='placurv',position=position, title=title, 
               xlim=xlim, ylim=ylim, xlabel=xlabel, ylabel=ylabel)

In [None]:
title='Profile curvature'
xlim = [-0.15,0.15]
ylim = [0,2500]
xlabel='...'
ylabel='count'
plot4positions(feature='procurv',position=position, title=title, 
               xlim=xlim, ylim=ylim, xlabel=xlabel, ylabel=ylabel)

In [None]:
title='Length-slope factor'
xlim = [0,40]
ylim = [0,2000]
xlabel='...'
ylabel='count'
plot4positions(feature='lsfactor',position=position, title=title, 
               xlim=xlim, ylim=ylim, xlabel=xlabel, ylabel=ylabel)

In [None]:
title='Topographic wetness index'
xlim = [0,20]
ylim = [0,2000]
xlabel='...'
ylabel='count'
plot4positions(feature='twi',position=position, title=title, 
               xlim=xlim, ylim=ylim, xlabel=xlabel, ylabel=ylabel)

In [None]:
title='Lithology of the surface material'
xlim = [0,8]
ylim = [0,6500]
xlabel='surface material'
ylabel='count'
plot4positions(feature='geology',position=position, title=title, 
               xlim=xlim, ylim=ylim, xlabel=xlabel, ylabel=ylabel)

In [None]:
title='Step duration orographic intensification factor'
xlim = [1,1.4]
ylim = [0,1100]
xlabel='step duration'
ylabel='count'
plot4positions(feature='sdoif',position=position, title=title, 
               xlim=xlim, ylim=ylim, xlabel=xlabel, ylabel=ylabel)

In [None]:
geology = pd.DataFrame()
for i in range(25):
    geology[f'{i+1}_geology'] = train[f'{i+1}_geology'].value_counts()
geology.T.describe()

In [None]:
sns.set(rc={'figure.figsize':(11,8)})

In [None]:
fig, axs = plt.subplots(3, 3)

sns.histplot(data=train, x="1_elevation", hue="Label", ax=axs[0,0], element="step", fill=False)
sns.histplot(train, x="1_slope", hue="Label", ax=axs[0,1], element="step", fill=False, legend=False)
sns.histplot(train, x="1_aspect", hue="Label", ax=axs[0,2], element="step", fill=False, legend=False)
sns.histplot(train, x="1_placurv", hue="Label", ax=axs[1,0], element="step", fill=False, legend=False)
sns.histplot(train, x="1_procurv", hue="Label", ax=axs[1,1], element="step", fill=False, legend=False)
sns.histplot(train, x="1_lsfactor", hue="Label", ax=axs[1,2], element="step", fill=False, legend=False)
sns.histplot(train, x="1_twi", hue="Label", ax=axs[2,0], element="step", fill=False, legend=False, log_scale=True)
sns.histplot(train, x="1_geology", hue="Label", ax=axs[2,1], multiple="dodge", element="step", fill=False, legend=False)
sns.histplot(train, x="1_sdoif", hue="Label", ax=axs[2,2], element="step", fill=False, legend=False)

fig.tight_layout()
plt.show()

In [None]:
sns.set(rc={'figure.figsize':(7,10)})

In [None]:
fig, axs = plt.subplots(4, 2)
sns.kdeplot(data=train, x="1_slope", y="1_elevation", hue="Label", ax=axs[0,0]);
sns.kdeplot(data=train, x="1_aspect", y="1_elevation", hue="Label", ax=axs[0,1], legend=False)
sns.kdeplot(data=train, x="1_placurv", y="1_elevation", hue="Label", ax=axs[1,0], legend=False)
sns.kdeplot(data=train, x="1_procurv", y="1_elevation", hue="Label", ax=axs[1,1], legend=False)
sns.kdeplot(data=train, x="1_lsfactor", y="1_elevation", hue="Label", ax=axs[2,0], legend=False)
sns.kdeplot(data=train, x="1_twi", y="1_elevation", hue="Label", ax=axs[2,1], legend=False)
sns.kdeplot(data=train, x="1_geology", y="1_elevation", hue="Label", ax=axs[3,0], legend=False)
sns.kdeplot(data=train, x="1_sdoif", y="1_elevation", hue="Label", ax=axs[3,1], legend=False)

fig.tight_layout()
plt.show()

In [None]:
fig, axs = plt.subplots(4, 2)
sns.kdeplot(data=train, x="1_aspect", y="1_slope", hue="Label", ax=axs[0,1])
sns.kdeplot(data=train, x="1_placurv", y="1_slope", hue="Label", ax=axs[1,0], legend=False)
sns.kdeplot(data=train, x="1_procurv", y="1_slope", hue="Label", ax=axs[1,1], legend=False)
sns.kdeplot(data=train, x="1_lsfactor", y="1_slope", hue="Label", ax=axs[2,0], legend=False)
sns.kdeplot(data=train, x="1_twi", y="1_slope", hue="Label", ax=axs[2,1], legend=False)
sns.kdeplot(data=train, x="1_geology", y="1_slope", hue="Label", ax=axs[3,0], legend=False)
sns.kdeplot(data=train, x="1_sdoif", y="1_slope", hue="Label", ax=axs[3,1], legend=False)

fig.tight_layout()
plt.show()

In [None]:
fig, axs = plt.subplots(3, 2)
sns.kdeplot(data=train, x="1_placurv", y="1_aspect", hue="Label", ax=axs[0,0])
sns.kdeplot(data=train, x="1_procurv", y="1_aspect", hue="Label", ax=axs[0,1], legend=False)
sns.kdeplot(data=train, x="1_lsfactor", y="1_aspect", hue="Label", ax=axs[1,0], legend=False)
sns.kdeplot(data=train, x="1_twi", y="1_aspect", hue="Label", ax=axs[1,1], legend=False)
sns.kdeplot(data=train, x="1_geology", y="1_aspect", hue="Label", ax=axs[2,0], legend=False)
sns.kdeplot(data=train, x="1_sdoif", y="1_aspect", hue="Label", ax=axs[2,1], legend=False)

fig.tight_layout()
plt.show()

In [None]:
fig, axs = plt.subplots(3, 2)
sns.kdeplot(data=train, x="1_procurv", y="1_placurv", hue="Label", ax=axs[0,1])
sns.kdeplot(data=train, x="1_lsfactor", y="1_placurv", hue="Label", ax=axs[1,0], legend=False)
sns.kdeplot(data=train, x="1_twi", y="1_placurv", hue="Label", ax=axs[1,1], legend=False)
sns.kdeplot(data=train, x="1_geology", y="1_placurv", hue="Label", ax=axs[2,0], legend=False)
sns.kdeplot(data=train, x="1_sdoif", y="1_placurv", hue="Label", ax=axs[2,1], legend=False)

fig.tight_layout()
plt.show()

In [None]:
fig, axs = plt.subplots(2, 2)
sns.kdeplot(data=train, x="1_lsfactor", y="1_procurv", hue="Label", ax=axs[0,0])
sns.kdeplot(data=train, x="1_twi", y="1_procurv", hue="Label", ax=axs[0,1], legend=False)
sns.kdeplot(data=train, x="1_geology", y="1_procurv", hue="Label", ax=axs[1,0], legend=False)
sns.kdeplot(data=train, x="1_sdoif", y="1_procurv", hue="Label", ax=axs[1,1], legend=False)

fig.tight_layout()
plt.show()

In [None]:
fig, axs = plt.subplots(2, 2)
sns.kdeplot(data=train, x="1_twi", y="1_lsfactor", hue="Label", ax=axs[0,1])
sns.kdeplot(data=train, x="1_geology", y="1_lsfactor", hue="Label", ax=axs[1,0], legend=False)
sns.kdeplot(data=train, x="1_sdoif", y="1_lsfactor", hue="Label", ax=axs[1,1], legend=False)

fig.tight_layout()
plt.show()

In [None]:
fig, axs = plt.subplots(2, 2)
sns.kdeplot(data=train, x="1_geology", y="1_twi", hue="Label", ax=axs[0,0])
sns.kdeplot(data=train, x="1_sdoif", y="1_twi", hue="Label", ax=axs[0,1], legend=False)
sns.kdeplot(data=train, x="1_sdoif", y="1_geology", hue="Label", ax=axs[1,1], legend=False)
fig.tight_layout()
plt.show()