In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, Flatten, MaxPooling1D, Dropout
from sklearn.metrics import recall_score, precision_score

# Load the data

In [3]:
data = pd.read_csv('god-class-2020+2019+2018.csv')
data = data[['WMCNAMM_type', 'LOC_type','is_god_class']]

# Normalize the data

In [4]:
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(data.drop(columns='is_god_class'))
X = data_scaled.reshape(data_scaled.shape[0], data_scaled.shape[1], 1)
y = data['is_god_class'].values

# Split data

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model Building

In [6]:
model = Sequential()

model.add(Conv1D(filters=32, kernel_size=2, activation='relu', input_shape=(X_train.shape[1], 1)))

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


# Train the model

In [7]:
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/10
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - accuracy: 0.6007 - loss: 0.6874 - val_accuracy: 0.5690 - val_loss: 0.6768
Epoch 2/10
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5677 - loss: 0.6750 - val_accuracy: 0.5690 - val_loss: 0.6648
Epoch 3/10
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5635 - loss: 0.6638 - val_accuracy: 0.5690 - val_loss: 0.6521
Epoch 4/10
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5963 - loss: 0.6482 - val_accuracy: 0.5690 - val_loss: 0.6404
Epoch 5/10
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5859 - loss: 0.6420 - val_accuracy: 0.5690 - val_loss: 0.6288
Epoch 6/10
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5997 - loss: 0.6337 - val_accuracy: 0.5690 - val_loss: 0.6150
Epoch 7/10
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0

<keras.src.callbacks.history.History at 0x17e3145d0>

# Evaluation

In [8]:
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
print("\nTest accuracy:", test_acc)

2/2 - 0s - 7ms/step - accuracy: 0.8103 - loss: 0.5764

Test accuracy: 0.8103448152542114


In [9]:
from sklearn.metrics import recall_score, precision_score
from sklearn.metrics import f1_score
y_pred_probs = model.predict(X_test)
y_pred = (y_pred_probs > 0.5).astype(int).flatten()

recall = recall_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f"Recall: {recall}")
print(f"Precision: {precision}")
print(f"F1 Score: {f1}")

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
Recall: 0.8484848484848485
Precision: 0.8235294117647058
F1 Score: 0.8358208955223881


In [10]:
import joblib
model.save('gc_model.h5')
joblib.dump(scaler, 'gc_scalar.pkl')



['gc_scalar.pkl']