In [9]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical

In [10]:
train_data = pd.read_csv('../data/raw_data.csv')
validation_data = pd.read_csv('../data/validation.csv')

le_status = LabelEncoder()

In [11]:
def preprocess_data(data, is_train=False):
   
    data.ffill(inplace=True)

    
    categorical_features = ['Gender', 'Married', 'Education', 'Self_Employed', 'Property_Area', 'Dependents']
    for feature in categorical_features:
        le = LabelEncoder()
        data[feature] = le.fit_transform(data[feature])

   
    scaler = StandardScaler()
    continuous_features = ['ApplicantIncome', 'CoapplicantIncome', 'LoanAmount', 'Loan_Amount_Term']
    data[continuous_features] = scaler.fit_transform(data[continuous_features])
    
    if is_train:
        
        data['Loan_Status'] = le_status.fit_transform(data['Loan_Status'])
    
    return data

In [12]:
train_data = preprocess_data(train_data, is_train=True)
validation_data = preprocess_data(validation_data)

In [13]:
X_train = train_data.drop(['Loan_ID', 'Loan_Status'], axis=1)
y_train = to_categorical(train_data['Loan_Status'])
X_val = validation_data.drop(['Loan_ID'], axis=1)

In [14]:
model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(64, activation='relu'),
    Dense(2, activation='softmax')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [15]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [16]:
model.fit(X_train, y_train, epochs=10, batch_size=10, verbose=1)

Epoch 1/10
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 736us/step - accuracy: 0.6870 - loss: 0.6331 
Epoch 2/10
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 597us/step - accuracy: 0.7070 - loss: 0.6697
Epoch 3/10
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 589us/step - accuracy: 0.7063 - loss: 0.6264
Epoch 4/10
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 623us/step - accuracy: 0.6799 - loss: 0.6289
Epoch 5/10
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 661us/step - accuracy: 0.6612 - loss: 0.6402
Epoch 6/10
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 719us/step - accuracy: 0.6713 - loss: 0.6342
Epoch 7/10
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 678us/step - accuracy: 0.6809 - loss: 0.6266
Epoch 8/10
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 605us/step - accuracy: 0.6926 - loss: 0.6171
Epoch 9/10
[1m62/62[0m [32m━━━━━━━━━

<keras.src.callbacks.history.History at 0x1f7a5460e30>

In [22]:
predictions = model.predict(X_val)
predictions

[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 813us/step


array([[0.3137794 , 0.68622065],
       [0.3137794 , 0.68622065],
       [0.3137794 , 0.68622065],
       [0.3137794 , 0.68622065],
       [0.3137794 , 0.68622065],
       [0.3137794 , 0.68622065],
       [0.3137794 , 0.68622065],
       [0.3137794 , 0.68622065],
       [0.3137794 , 0.68622065],
       [0.3137794 , 0.68622065],
       [0.3137794 , 0.68622065],
       [0.3137794 , 0.68622065],
       [0.3137794 , 0.68622065],
       [0.3137794 , 0.68622065],
       [0.3137794 , 0.68622065],
       [0.3137794 , 0.68622065],
       [0.3137794 , 0.68622065],
       [0.3137794 , 0.68622065],
       [0.3137794 , 0.68622065],
       [0.3137794 , 0.68622065],
       [0.3137794 , 0.68622065],
       [0.3137794 , 0.68622065],
       [0.3137794 , 0.68622065],
       [0.3137794 , 0.68622065],
       [0.3137794 , 0.68622065],
       [0.3137794 , 0.68622065],
       [0.3137794 , 0.68622065],
       [0.3137794 , 0.68622065],
       [0.3137794 , 0.68622065],
       [0.3137794 , 0.68622065],
       [0.

In [21]:
predicted_status = le_status.inverse_transform(predictions.argmax(axis=1))
predicted_status

array(['Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',
       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',
       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',
       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',
       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',
       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',
       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',
       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',
       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',
       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',
       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',
       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',
       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y',
       'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y', 'Y

In [19]:
results = pd.DataFrame({
    'Loan_ID': validation_data['Loan_ID'],
    'Predicted_Loan_Status': ['Y' if status == 1 else 'N' for status in predicted_status]
})
results

Unnamed: 0,Loan_ID,Predicted_Loan_Status
0,LP001015,N
1,LP001022,N
2,LP001031,N
3,LP001035,N
4,LP001051,N
...,...,...
362,LP002971,N
363,LP002975,N
364,LP002980,N
365,LP002986,N
