In [None]:
# !pip install tensorflow

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import make_column_transformer
from sklearn.model_selection import GroupShuffleSplit
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import callbacks
from sklearn.metrics import mean_squared_error

In [None]:
import matplotlib.pyplot as plt
plt.style.use('seaborn-whitegrid')
# Set Matplotlib defaults
plt.rc('figure', autolayout=True)
plt.rc('axes', labelweight='bold', labelsize='large',
       titleweight='bold', titlesize=18, titlepad=10)
plt.rc('animation', html='html5')

In [None]:
df=pd.read_csv('./datasciencegroup.csv')

In [None]:
df.columns

In [None]:
df_choose=df[['sub_age', 'sub_sex',
       'sub_shift', 'sub_team', 'sub_role', 'sub_colls_same_sex_prtn',
       'sub_health_h', 'sub_commitment_h', 'sub_perceptiveness_h',
       'sub_dexterity_h', 'sub_sociality_h', 'sub_goodness_h',
       'sub_strength_h', 'sub_openmindedness_h', 'sub_workstyle_h', 'sup_ID',
        'sup_sub_age_diff', 'sup_sex',
       'sup_role', 'sup_commitment_h', 'sup_perceptiveness_h',
       'sup_goodness_h', 
       'event_weekday_name', 'actual_efficacy_h']].copy()

In [None]:
df_choose.head()

In [None]:
df_choose.dtypes

In [None]:
X = df_choose.copy()
y = X.pop('actual_efficacy_h')

quantitative_var=X.select_dtypes('float64')
quantitative_var_col_names=quantitative_var.columns
categori_var=X.select_dtypes('object')
categori_var_col_names=categori_var.columns


preprocessor = make_column_transformer(
    (StandardScaler(), quantitative_var_col_names),
    (OneHotEncoder(), categori_var_col_names),
)

X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.33, random_state=42)


X_train = preprocessor.fit_transform(X_train)
X_valid = preprocessor.transform(X_valid)
y_train = y_train
y_valid = y_valid

input_shape = [X_train.shape[1]]
print("Input shape: {}".format(input_shape))

In [None]:
early_stopping =  callbacks.EarlyStopping(
    min_delta=0.001, 
    patience=5, 
    restore_best_weights=True)

In [None]:
model = keras.Sequential([
    layers.Dense(128, activation='relu', input_shape=input_shape),
    layers.Dense(64, activation='relu'),    
    layers.Dense(1)
])
model.compile(
    optimizer='adam',
    loss='mae',
)
history = model.fit(
    X_train, y_train,
    validation_data=(X_valid, y_valid),
    batch_size=512,
    epochs=100,
    callbacks=[early_stopping]
)

In [None]:
print('MEA of Traning data',mean_squared_error(y_train, model.predict(X_train)))
print('MEA of Test data',mean_squared_error(y_valid, model.predict(X_valid)))

In [None]:
history_df = pd.DataFrame(history.history)
history_df.loc[:, ['loss', 'val_loss']].plot()
print("Minimum Validation Loss: {:0.4f}".format(history_df['val_loss'].min()));