In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

# Load the data
train_df = pd.read_csv("/content/train_ctrUa4K.csv")
test_df = pd.read_csv("/content/test_lAUu6dG.csv")

# Combine train and test data for preprocessing
combined_df = pd.concat([train_df, test_df], axis=0, ignore_index=True)

# Fill missing values
combined_df['Gender'].fillna(combined_df['Gender'].mode()[0], inplace=True)
combined_df['Married'].fillna(combined_df['Married'].mode()[0], inplace=True)
combined_df['Dependents'].fillna(combined_df['Dependents'].mode()[0], inplace=True)
combined_df['Self_Employed'].fillna(combined_df['Self_Employed'].mode()[0], inplace=True)
combined_df['Credit_History'].fillna(combined_df['Credit_History'].mode()[0], inplace=True)
combined_df['LoanAmount'].fillna(combined_df['LoanAmount'].median(), inplace=True)
combined_df['Loan_Amount_Term'].fillna(combined_df['Loan_Amount_Term'].mode()[0], inplace=True)

# Encode categorical variables
cat_cols = ['Gender', 'Married', 'Dependents', 'Education', 'Self_Employed', 'Credit_History', 'Property_Area']
combined_df = pd.get_dummies(combined_df, columns=cat_cols, drop_first=True)

# Splitting back into train and test
train_df = combined_df[:train_df.shape[0]]
test_df = combined_df[train_df.shape[0]:]

# Selecting features and target variable
X = train_df.drop(['Loan_ID', 'Loan_Status'], axis=1)
y = train_df['Loan_Status']

# Splitting data into train and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Model training
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predictions on validation set
y_pred = model.predict(X_val)

# Calculate accuracy
accuracy = model.score(X_val, y_val)
print("Accuracy:", accuracy)

# Predictions on test set
test_X = test_df.drop(['Loan_ID', 'Loan_Status'], axis=1)
test_predictions = model.predict(test_X)

# Create submission file
submission_df = pd.DataFrame({'Loan_ID': test_df['Loan_ID'], 'Loan_Status': test_predictions})
submission_df.to_csv("submission.csv", index=False)

Accuracy: 0.7804878048780488
