In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, Flatten, MaxPooling1D, Dropout

# Load the data

In [12]:
data = pd.read_csv('feature-envy-2020+2019+2018.csv')
data = data[['ATFD_method', 'LAA_method','is_feature_envy']]

# Normalize the data

In [13]:
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(data.drop(columns='is_feature_envy'))
X = data_scaled.reshape(data_scaled.shape[0], data_scaled.shape[1], 1)
y = data['is_feature_envy'].values

# Split data

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model Building

In [15]:
model = Sequential()

model.add(Conv1D(filters=32, kernel_size=2, activation='relu', input_shape=(X_train.shape[1], 1)))

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_1 (Conv1D)           (None, 1, 32)             96        
                                                                 
 flatten_1 (Flatten)         (None, 32)                0         
                                                                 
 dense_3 (Dense)             (None, 128)               4224      
                                                                 
 dropout_1 (Dropout)         (None, 128)               0         
                                                                 
 dense_4 (Dense)             (None, 64)                8256      
                                                                 
 dense_5 (Dense)             (None, 1)                 65        
                                                                 
Total params: 12,641
Trainable params: 12,641
Non-trai

# Train the model

In [16]:
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1a3996a47c0>

# Evaluation

In [17]:
from sklearn.metrics import recall_score, precision_score
from sklearn.metrics import f1_score
y_pred_probs = model.predict(X_test)
y_pred = (y_pred_probs > 0.5).astype(int).flatten()

recall = recall_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f"Recall: {recall}")
print(f"Precision: {precision}")
print(f"F1 Score: {f1}")

Recall: 1.0
Precision: 0.6363636363636364
F1 Score: 0.7777777777777778


In [18]:
import joblib
model.save('FeatureEnvy.h5')
joblib.dump(scaler, 'FeScaler.pkl')

['FeSyntheticScaler.pkl']