In [1]:
import pandas as pd
import tensorflow as tf
import numpy
df_train_labels_original = pd.read_csv('train_labels.csv',low_memory=False, dtype= {
    'damage_grade':'uint8'
}).set_index('building_id')
df_train_values_original = pd.read_csv('train_values.csv',low_memory=False, dtype= {
    'geo_level_1_id':'category', 
    'geo_level_2_id':'category',
    'geo_level_3_id':'category', 
    'count_floors_pre_eq':'uint8',
    'age':'uint16',
    'area_percentage':'uint16', 
    'height_percentage':'uint16', 
    'land_surface_condition':'category', 
    'foundation_type':'category',
    'roof_type':'category',
    'ground_floor_type':'category',
    'other_floor_type':'category',
    'position':'category',
    'plan_configuration':'category', 
    'has_superstructure_adobe_mud':'uint8',
    'has_superstructure_mud_mortar_stone':'uint8',
    'has_superstructure_stone_flag':'uint8',
    'has_superstructure_cement_mortar_stone':'uint8', 
    'has_superstructure_mud_mortar_brick':'uint8', 
    'has_superstructure_cement_mortar_brick':'uint8', 
    'has_superstructure_timber':'uint8', 
    'has_superstructure_bamboo':'uint8',
    'has_superstructure_rc_non_engineered':'uint8',
    'has_superstructure_rc_engineered':'uint8',
    'has_superstructure_other':'uint8', 
    'legal_ownership_status':'category',
    'count_families':'uint16', 
    'has_secondary_use':'uint8', 
    'has_secondary_use_agriculture':'uint8', 
    'has_secondary_use_hotel':'uint8',
    'has_secondary_use_rental':'uint8',
    'has_secondary_use_institution':'uint8',
    'has_secondary_use_school':'uint8', 
    'has_secondary_use_industry':'uint8', 
    'has_secondary_use_health_post':'uint8', 
    'has_secondary_use_gov_office':'uint8', 
    'has_secondary_use_use_police':'uint8', 
    'has_secondary_use_other':'uint8',
}).set_index('building_id')

pd.options.display.float_format = '{:20,.2f}'.format

KeyboardInterrupt: 

In [None]:
df_train_values = df_train_values_original.drop(columns=['geo_level_1_id','geo_level_2_id','geo_level_3_id'])

In [None]:
df = df_train_values.join(df_train_labels_original,how="inner")
df

In [None]:
target = df.pop('damage_grade').apply(lambda x: x-1)
target

In [None]:
df_lsc = df.drop(columns=['land_surface_condition']).join(pd.get_dummies(df['land_surface_condition'])).rename(columns={
    "n": "lsc_n",
    "o": "lsc_o",
    "t": "lsc_t",
})
df_lsc

In [None]:
df_ft = df_lsc.drop(columns=['foundation_type']).join(pd.get_dummies(df_lsc['foundation_type'])).rename(columns={
    "h": "ft_h",
    "i": "ft_i",
    "r": "ft_r",
    "u": "ft_u",
    "w": "ft_w",
})
df_ft

In [None]:
df_rt = df_ft.drop(columns=['roof_type']).join(pd.get_dummies(df_ft['roof_type'])).rename(columns={
    "n": "rt_n",
    "q": "rt_q",
    "x": "rt_x",
})
df_rt

In [None]:
df_gft = df_rt.drop(columns=['ground_floor_type']).join(pd.get_dummies(df_rt['ground_floor_type'])).rename(columns={
    "f": "gft_f",
    "m": "gft_m",
    "v": "gft_v",
    "x": "gft_x",
    "z": "gft_z",
})
df_gft

In [None]:
df_oft = df_gft.drop(columns=['other_floor_type']).join(pd.get_dummies(df_gft['other_floor_type'])).rename(columns={
    "j": "oft_j",
    "q": "oft_q",
    "s": "oft_s",
    "x": "oft_x",
})
df_oft

In [None]:
df_p = df_oft.drop(columns=['position']).join(pd.get_dummies(df_oft['position'])).rename(columns={
    "j": "p_j",
    "o": "p_o",
    "s": "p_s",
    "t": "p_t",
})
df_p

In [None]:
df_pc = df_p.drop(columns=['plan_configuration']).join(pd.get_dummies(df_p['plan_configuration'])).rename(columns={
    "a": "pc_a",
    "c": "pc_c",
    "d": "pc_d",
    "f": "pc_f",
    "m": "pc_m",
    "n": "pc_n",
    "o": "pc_o",
    "q": "pc_q",
    "s": "pc_s",
    "u": "pc_u",
})
df_pc

In [None]:
df_los = df_pc.drop(columns=['legal_ownership_status']).join(pd.get_dummies(df_pc['legal_ownership_status'])).rename(columns={
    "a": "los_a",
    "r": "los_r",
    "v": "los_v",
    "w": "los_w",
})
df_los

In [None]:
numpy_database = df_los.to_numpy(dtype=numpy.int64)
numpy_database

In [None]:
numpy_database[:1]

In [None]:
dataset = tf.data.Dataset.from_tensor_slices((df_los, target.values))
dataset

In [None]:
for feat, targ in dataset.take(5):
  print ('Features: {}, Target: {}'.format(feat, targ))


In [None]:
train_dataset = dataset.shuffle(len(df)).batch(1)
train_dataset

In [None]:
with tf.device('/CPU:0'):
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(120, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(120, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(units=3, activation='sigmoid')
      ])
predictions = model(numpy_database[:1]).numpy()
predictions

In [None]:
with tf.device('/CPU:0'):
    loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)
    model.compile(optimizer='Adagrad',
                  loss=loss_fn,
                  metrics=['accuracy'])
    model.fit(train_dataset, epochs=1)

In [None]:
predictions = model(numpy_database[:1]).numpy()
predictions

In [None]:
with tf.device('/CPU:0'):
    model.fit(train_dataset, epochs=4)

In [None]:
predictions = model(numpy_database[:1]).numpy()
predictions

In [None]:
model.save('modelos/NNModel')

In [None]:
with tf.device('/CPU:0'):
    model.fit(train_dataset, epochs=50)
model.save('modelos/NNModel')

In [None]:
train_f = pd.read_csv('test_values.csv',low_memory=False, dtype= {
    'geo_level_1_id':'category', 
    'geo_level_2_id':'category',
    'geo_level_3_id':'category', 
    'count_floors_pre_eq':'uint8',
    'age':'uint16',
    'area_percentage':'uint16', 
    'height_percentage':'uint16', 
    'land_surface_condition':'category', 
    'foundation_type':'category',
    'roof_type':'category',
    'ground_floor_type':'category',
    'other_floor_type':'category',
    'position':'category',
    'plan_configuration':'category', 
    'has_superstructure_adobe_mud':'uint8',
    'has_superstructure_mud_mortar_stone':'uint8',
    'has_superstructure_stone_flag':'uint8',
    'has_superstructure_cement_mortar_stone':'uint8', 
    'has_superstructure_mud_mortar_brick':'uint8', 
    'has_superstructure_cement_mortar_brick':'uint8', 
    'has_superstructure_timber':'uint8', 
    'has_superstructure_bamboo':'uint8',
    'has_superstructure_rc_non_engineered':'uint8',
    'has_superstructure_rc_engineered':'uint8',
    'has_superstructure_other':'uint8', 
    'legal_ownership_status':'category',
    'count_families':'uint16', 
    'has_secondary_use':'uint8', 
    'has_secondary_use_agriculture':'uint8', 
    'has_secondary_use_hotel':'uint8',
    'has_secondary_use_rental':'uint8',
    'has_secondary_use_institution':'uint8',
    'has_secondary_use_school':'uint8', 
    'has_secondary_use_industry':'uint8', 
    'has_secondary_use_health_post':'uint8', 
    'has_secondary_use_gov_office':'uint8', 
    'has_secondary_use_use_police':'uint8', 
    'has_secondary_use_other':'uint8',
}).set_index('building_id').drop(columns=['geo_level_1_id','geo_level_2_id','geo_level_3_id'])

tf_los = train_f.drop(columns=['land_surface_condition']).join(pd.get_dummies(train_f['land_surface_condition'])).rename(columns={
    "n": "lsc_n",
    "o": "lsc_o",
    "t": "lsc_t",
}).drop(columns=['foundation_type']).join(pd.get_dummies(train_f['foundation_type'])).rename(columns={
    "h": "ft_h",
    "i": "ft_i",
    "r": "ft_r",
    "u": "ft_u",
    "w": "ft_w",
}).drop(columns=['roof_type']).join(pd.get_dummies(train_f['roof_type'])).rename(columns={
    "n": "rt_n",
    "q": "rt_q",
    "x": "rt_x",
}).drop(columns=['ground_floor_type']).join(pd.get_dummies(train_f['ground_floor_type'])).rename(columns={
    "f": "gft_f",
    "m": "gft_m",
    "v": "gft_v",
    "x": "gft_x",
    "z": "gft_z",
}).drop(columns=['other_floor_type']).join(pd.get_dummies(train_f['other_floor_type'])).rename(columns={
    "j": "oft_j",
    "q": "oft_q",
    "s": "oft_s",
    "x": "oft_x",
}).drop(columns=['position']).join(pd.get_dummies(train_f['position'])).rename(columns={
    "j": "p_j",
    "o": "p_o",
    "s": "p_s",
    "t": "p_t",
}).drop(columns=['plan_configuration']).join(pd.get_dummies(train_f['plan_configuration'])).rename(columns={
    "a": "pc_a",
    "c": "pc_c",
    "d": "pc_d",
    "f": "pc_f",
    "m": "pc_m",
    "n": "pc_n",
    "o": "pc_o",
    "q": "pc_q",
    "s": "pc_s",
    "u": "pc_u",
}).drop(columns=['legal_ownership_status']).join(pd.get_dummies(train_f['legal_ownership_status'])).rename(columns={
    "a": "los_a",
    "r": "los_r",
    "v": "los_v",
    "w": "los_w",
})

test_database = tf_los.to_numpy(dtype=numpy.int64)
test_database

In [None]:
predictions = model(test_database).numpy()
predictions

In [None]:
building_ids = tf.reset_index()['building_id']
building_ids

In [None]:
results = []
for line in predictions:
    if line[0] > line [1] and line [0] > line [2]:
        results.append(1)
    elif line[1] > line[2]:
        results.append(2)
    else:
        results.append(3)
result_series = pd.Series(results).rename("damage_grade")
result_series

In [None]:
my_submission = pd.merge(building_ids,result_series,left_index=True,right_index=True).set_index('building_id')
my_submission

In [None]:
my_submission.to_csv('submission_feda_1.csv')