In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.model_selection import train_test_split

%matplotlib notebook

print("Tensorflow version: {0}".format(tf.__version__))

Tensorflow version: 2.0.0-alpha0


In [2]:
train_vals = pd.read_csv('train_values.csv')
train_labs = pd.read_csv('train_labels.csv')
test_vals = pd.read_csv('test_values.csv')

In [3]:
print("Training values nans: {0}".format(train_vals.isnull().values.sum()))
print("Training labels nans: {0}".format(train_labs.isnull().values.sum()))
print("Testing values nans: {0}".format(test_vals.isnull().values.sum()))

Training values nans: 0
Training labels nans: 0
Testing values nans: 0


In [4]:
train_vals.head()

Unnamed: 0,building_id,geo_level_1_id,geo_level_2_id,geo_level_3_id,count_floors_pre_eq,age,area_percentage,height_percentage,land_surface_condition,foundation_type,...,has_secondary_use_agriculture,has_secondary_use_hotel,has_secondary_use_rental,has_secondary_use_institution,has_secondary_use_school,has_secondary_use_industry,has_secondary_use_health_post,has_secondary_use_gov_office,has_secondary_use_use_police,has_secondary_use_other
0,802906,6,487,12198,2,30,6,5,t,r,...,0,0,0,0,0,0,0,0,0,0
1,28830,8,900,2812,2,10,8,7,o,r,...,0,0,0,0,0,0,0,0,0,0
2,94947,21,363,8973,2,10,5,5,t,r,...,0,0,0,0,0,0,0,0,0,0
3,590882,22,418,10694,2,10,6,5,t,r,...,0,0,0,0,0,0,0,0,0,0
4,201944,11,131,1488,3,30,8,9,t,r,...,0,0,0,0,0,0,0,0,0,0


In [5]:
train_labs.head()

Unnamed: 0,building_id,damage_grade
0,802906,3
1,28830,2
2,94947,3
3,590882,2
4,201944,3


In [6]:
def make_one_hot(df, col_name, new_col_prefix):
    dist_vals = df[col_name].unique()
    for i in dist_vals:
        df["{0}_{1}".format(new_col_prefix,i)] = df[col_name] == i
    df.drop(columns=[col_name,], inplace=True)

In [7]:
make_one_hot(train_vals, 'land_surface_condition', 'lsc')
make_one_hot(train_vals, 'foundation_type', 'ftyp')
make_one_hot(train_vals, 'roof_type', 'rtyp')
make_one_hot(train_vals, 'ground_floor_type', 'gft')
make_one_hot(train_vals, 'other_floor_type', 'oft')
make_one_hot(train_vals, 'position', 'pos')
make_one_hot(train_vals, 'plan_configuration', 'pconf')
make_one_hot(train_vals, 'legal_ownership_status', 'los')

In [8]:
make_one_hot(test_vals, 'land_surface_condition', 'lsc')
make_one_hot(test_vals, 'foundation_type', 'ftyp')
make_one_hot(test_vals, 'roof_type', 'rtyp')
make_one_hot(test_vals, 'ground_floor_type', 'gft')
make_one_hot(test_vals, 'other_floor_type', 'oft')
make_one_hot(test_vals, 'position', 'pos')
make_one_hot(test_vals, 'plan_configuration', 'pconf')
make_one_hot(test_vals, 'legal_ownership_status', 'los')

In [9]:
print('Train dataset shape: {0}'.format(train_vals.shape))
print('Test dataset shape: {0}'.format(test_vals.shape))

Train dataset shape: (260601, 69)
Test dataset shape: (86868, 69)


In [10]:
train_vals.head()

Unnamed: 0,building_id,geo_level_1_id,geo_level_2_id,geo_level_3_id,count_floors_pre_eq,age,area_percentage,height_percentage,has_superstructure_adobe_mud,has_superstructure_mud_mortar_stone,...,pconf_m,pconf_c,pconf_a,pconf_n,pconf_f,pconf_o,los_v,los_a,los_r,los_w
0,802906,6,487,12198,2,30,6,5,1,1,...,False,False,False,False,False,False,True,False,False,False
1,28830,8,900,2812,2,10,8,7,0,1,...,False,False,False,False,False,False,True,False,False,False
2,94947,21,363,8973,2,10,5,5,0,1,...,False,False,False,False,False,False,True,False,False,False
3,590882,22,418,10694,2,10,6,5,0,1,...,False,False,False,False,False,False,True,False,False,False
4,201944,11,131,1488,3,30,8,9,1,0,...,False,False,False,False,False,False,True,False,False,False


In [11]:
test_vals.head()

Unnamed: 0,building_id,geo_level_1_id,geo_level_2_id,geo_level_3_id,count_floors_pre_eq,age,area_percentage,height_percentage,has_superstructure_adobe_mud,has_superstructure_mud_mortar_stone,...,pconf_c,pconf_m,pconf_s,pconf_o,pconf_f,pconf_n,los_v,los_a,los_w,los_r
0,300051,17,596,11307,3,20,7,6,0,1,...,False,False,False,False,False,False,True,False,False,False
1,99355,6,141,11987,2,25,13,5,0,1,...,False,False,False,False,False,False,True,False,False,False
2,890251,22,19,10044,2,5,4,5,0,1,...,False,False,False,False,False,False,True,False,False,False
3,745817,26,39,633,1,0,19,3,0,0,...,False,False,False,False,False,False,True,False,False,False
4,421793,17,289,7970,3,15,8,7,0,1,...,False,False,False,False,False,False,True,False,False,False


In [12]:
X_train, X_val, y_train, y_val = train_test_split(train_vals.iloc[:, 1:].values, train_labs.iloc[:, 1].values, test_size=0.2, random_state=123)
X_test = test_vals.iloc[:, 1:].values

In [13]:
print("Train shapes: {0} {1}".format(X_train.shape, y_train.shape))
print("Val shapes: {0} {1}".format(X_val.shape, y_val.shape))
print("Test shapes: {0}".format(X_test.shape))

Train shapes: (208480, 68) (208480,)
Val shapes: (52121, 68) (52121,)
Test shapes: (86868, 68)


In [14]:
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(64, activation='relu', input_shape=(68,)))
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.Dense(32, activation='relu'))
model.add(tf.keras.layers.Dropout(0.1))
model.add(tf.keras.layers.Dense(16, activation='relu'))
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))

In [15]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [16]:
model.fit(X_train, y_train, epochs=500)

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500

KeyboardInterrupt: 

In [None]:
model.evaluate(X_val, y_val)