In [1]:
pip install pandas sentence-transformers faiss-cpu


Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0.post1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.0 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cublas_cu12-12.4.5.8-

In [9]:
import pandas as pd
import torch
from sentence_transformers import SentenceTransformer
import faiss
from collections import Counter

train_df = pd.read_csv("/content/Training Dataset.csv")
test_df = pd.read_csv("/content/Test Dataset.csv")

assert 'Loan_Status' in train_df.columns, "Training data must have 'Loan_Status'"
assert 'Loan_ID' in test_df.columns, "Test data must have 'Loan_ID'"

def row_to_text(row):
    return (
        f"{row['Gender']} {row['Married']} with {row['Dependents']} dependents, "
        f"{row['Education']} education, {row['Self_Employed']} status, "
        f"income: {row['ApplicantIncome']} + coapplicant: {row['CoapplicantIncome']}, "
        f"loan amount: {row['LoanAmount']} over {row['Loan_Amount_Term']} months, "
        f"credit history: {row['Credit_History']}, property: {row['Property_Area']}"
    )

train_df['text'] = train_df.apply(row_to_text, axis=1)
train_texts = train_df['text'].fillna("").tolist()
train_labels = train_df['Loan_Status'].fillna("n").tolist()

test_df['text'] = test_df.apply(row_to_text, axis=1)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = SentenceTransformer("all-MiniLM-L6-v2", device=device)

train_embeddings = model.encode(train_texts, convert_to_tensor=True).cpu().numpy()

#faiss
dim = train_embeddings.shape[1]
index = faiss.IndexFlatL2(dim)
index.add(train_embeddings)

def classify_query(query, top_k=5):
    query_emb = model.encode([query], convert_to_tensor=True).cpu().numpy()
    _, indices = index.search(query_emb, top_k)
    top_labels = [train_labels[i] for i in indices[0]]
    return Counter(top_labels).most_common(1)[0][0]

test_df['Answer'] = test_df['text'].apply(classify_query)

submission_df = test_df[['Loan_ID', 'Answer']]
submission_df.to_csv("Final_Submission.csv", index=False)
print("Final submission file saved to Final_Submission.csv")


Final submission file saved to Final_Submission.csv


In [4]:
print(train_df.columns)


Index(['Loan_ID', 'Gender', 'Married', 'Dependents', 'Education',
       'Self_Employed', 'ApplicantIncome', 'CoapplicantIncome', 'LoanAmount',
       'Loan_Amount_Term', 'Credit_History', 'Property_Area', 'Loan_Status'],
      dtype='object')
