In [105]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split

# Load the loan dataset (assuming it's in a CSV file)
loan_data = pd.read_csv('/content/trainloan.csv')

# Preprocess the data
# Drop irrelevant columns or those with missing values
loan_data = loan_data.drop(['Loan_ID'], axis=1)
loan_data = loan_data.dropna()
loan_data.to_csv('trained.csv', index=False)


In [107]:
loan_data = pd.read_csv('/content/trained.csv')
# Convert categorical variables to numerical using label encoding
label_encoder = LabelEncoder()
loan_data['Gender'] = label_encoder.fit_transform(loan_data['Gender'])
loan_data['Married'] = label_encoder.fit_transform(loan_data['Married'])
loan_data['Education'] = label_encoder.fit_transform(loan_data['Education'])
loan_data['Self_Employed'] = label_encoder.fit_transform(loan_data['Self_Employed'])
loan_data['Property_Area'] = label_encoder.fit_transform(loan_data['Property_Area'])

# One-hot encode categorical variables
loan_data = pd.get_dummies(loan_data, columns=['Dependents', 'Credit_History'])
loan_data['ApplicantIncome'] = (loan_data['ApplicantIncome'] - loan_data['ApplicantIncome'].mean()) / loan_data['ApplicantIncome'].std()
loan_data['CoapplicantIncome'] = (loan_data['CoapplicantIncome'] - loan_data['CoapplicantIncome'].mean()) / loan_data['CoapplicantIncome'].std()
loan_data['LoanAmount'] = (loan_data['LoanAmount'] - loan_data['LoanAmount'].mean()) / loan_data['LoanAmount'].std()



In [108]:


# Split the dataset into training and testing sets
X = loan_data.drop('Loan_Status', axis=1)
y = loan_data['Loan_Status']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the KNN classifier
knn_classifier = KNeighborsClassifier(n_neighbors=5)
knn_classifier.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = knn_classifier.predict(X_test)
print(y_pred)



['Y' 'Y' 'Y' 'N' 'Y' 'N' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'N' 'N'
 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'N' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y'
 'Y' 'Y' 'Y' 'N' 'Y' 'Y' 'Y' 'Y' 'Y' 'N' 'Y' 'Y' 'Y' 'N' 'N' 'Y' 'Y' 'Y'
 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'N' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y'
 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'N' 'Y' 'Y' 'Y' 'Y'
 'N' 'Y' 'Y' 'Y' 'Y' 'Y']


In [109]:
test_data = pd.read_csv('/content/testloan.csv')

# Preprocess the data
# Drop irrelevant columns or those with missing values
test_data = test_data.drop(['Loan_ID'], axis=1)
test_data = test_data.dropna()
test_data.to_csv('tested.csv', index=False)

In [110]:
test_data = pd.read_csv('/content/tested.csv')
# Convert categorical variables to numerical using label encoding
label_encoder = LabelEncoder()
test_data['Gender'] = label_encoder.fit_transform(test_data['Gender'])
test_data['Married'] = label_encoder.fit_transform(test_data['Married'])
test_data['Education'] = label_encoder.fit_transform(test_data['Education'])
test_data['Self_Employed'] = label_encoder.fit_transform(test_data['Self_Employed'])
test_data['Property_Area'] = label_encoder.fit_transform(test_data['Property_Area'])
#test_data['ApplicantIncome'] = label_encoder.fit_transform(test_data['ApplicantIncome'])
# One-hot encode categorical variables
test_data = pd.get_dummies(test_data, columns=['Dependents', 'Credit_History'])




In [111]:
missing_cols = set(X_train.columns) - set(test_data.columns)
for col in missing_cols:
    test_data[col] = 0

# Reorder the columns to match the training dataset
test_data = test_data[X_train.columns]


In [112]:
predictions = knn_classifier.predict(test_data)
test_data['Loan_Status'] = predictions

# Save the test data with the predicted values to a new CSV file
test_data.to_csv('output.csv', index=False)
# Print the predictions
print(predictions)

['Y' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'Y' 'Y' 'N' 'N' 'N' 'Y' 'N' 'Y' 'N'
 'Y' 'N' 'N' 'N' 'N' 'Y' 'N' 'N' 'N' 'N' 'N' 'Y' 'N' 'Y' 'N' 'N' 'N' 'N'
 'Y' 'Y' 'N' 'N' 'Y' 'Y' 'N' 'N' 'Y' 'N' 'Y' 'N' 'Y' 'Y' 'N' 'N' 'N' 'Y'
 'N' 'Y' 'N' 'N' 'N' 'Y' 'N' 'Y' 'Y' 'Y' 'N' 'Y' 'N' 'Y' 'Y' 'N' 'N' 'N'
 'N' 'Y' 'N' 'N' 'N' 'Y' 'N' 'N' 'Y' 'N' 'N' 'Y' 'N' 'N' 'N' 'N' 'Y' 'N'
 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'Y' 'Y' 'Y' 'Y' 'Y' 'N' 'N' 'N' 'N'
 'Y' 'N' 'N' 'N' 'Y' 'Y' 'Y' 'Y' 'N' 'Y' 'N' 'Y' 'N' 'N' 'Y' 'N' 'Y' 'N'
 'N' 'N' 'N' 'Y' 'N' 'Y' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'Y' 'N' 'Y' 'N' 'Y'
 'N' 'N' 'N' 'N' 'N' 'Y' 'N' 'Y' 'N' 'Y' 'Y' 'Y' 'Y' 'Y' 'N' 'Y' 'N' 'N'
 'Y' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'Y' 'Y' 'N' 'Y' 'Y' 'N' 'N' 'N' 'N' 'Y'
 'Y' 'N' 'N' 'N' 'N' 'Y' 'N' 'N' 'Y' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N'
 'N' 'N' 'N' 'Y' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'N' 'Y' 'N' 'N' 'N' 'Y'
 'N' 'N' 'N' 'Y' 'Y' 'N' 'N' 'Y' 'N' 'Y' 'Y' 'N' 'Y' 'N' 'N' 'N' 'N' 'N'
 'N' 'N' 'Y' 'N' 'Y' 'N' 'Y' 'N' 'Y' 'N' 'Y' 'Y' 'Y