In [5]:
import pandas as pd
import pickle

# Load model, scaler, and choices
with open('training/model.pkl', 'rb') as f: 
    model = pickle.load(f)

with open('training/scaler.pkl', 'rb') as f: 
    scaler = pickle.load(f)

with open('training/choices.pkl', 'rb') as f: 
    choices = pickle.load(f)

# Extract columns from choices
columns = list(choices.keys())
print("Columns from choices:", columns)

# Create a DataFrame with these columns
dummy_data = {col: [0] * len(columns) for col in columns}

# Create a DataFrame
X_train_sample = pd.DataFrame(dummy_data)

# Inspect columns in sample data
print("Sample DataFrame columns:", X_train_sample.columns)

# Sample input data (replace with actual form inputs)
input_data = {
    "Combined_income": 50000,
    "Requested_amount": 20000,
    "Credit_history": "Yes",
    "Community_type": "Urban",
    "Dependents": "2",
    "College_degree": "Yes"
}

# Create input DataFrame with appropriate column names
input_df = pd.DataFrame([input_data], columns=columns)

# Ensure column consistency
def ensure_column_consistency(input_df, expected_columns):
    for col in expected_columns:
        if col not in input_df.columns:
            input_df[col] = 0  # Add missing columns with default values
    input_df = input_df[expected_columns]  # Reorder columns to match expected
    return input_df

input_df = ensure_column_consistency(input_df, columns)

# Verify columns presence
def verify_columns(input_df, expected_columns):
    missing_columns = set(expected_columns) - set(input_df.columns)
    extra_columns = set(input_df.columns) - set(expected_columns)
    return missing_columns, extra_columns

missing_cols, extra_cols = verify_columns(input_df, columns)
print("Missing columns:", missing_cols)
print("Extra columns:", extra_cols)

# Prepare data for scaling (includes both categorical and continuous)
X_prepared = input_df.copy()

# Convert categorical features to numeric (0/1 encoding)
for col in columns:
    if choices[col] is not None:  # Categorical columns
        X_prepared[col] = X_prepared[col].apply(lambda x: 1 if x in choices[col] else 0)
        
# Prepare continuous features (convert to float if needed)
X_prepared = X_prepared.astype('float')

# Scale the features
X_scaled = scaler.transform(X_prepared)

# Make prediction
output = model.predict(X_scaled)

# Interpret the output
prediction = 'Yes' if output[0] == 1 else 'No'
print("Prediction:", prediction)


Columns from choices: ['ApplicantIncome', 'CoapplicantIncome', 'LoanAmount', 'Loan_Amount_Term', 'Credit_History', 'Gender_Female', 'Gender_Male', 'Married_No', 'Married_Yes', 'Dependents_0', 'Dependents_1', 'Dependents_2', 'Dependents_3+', 'Education_Graduate', 'Education_Not Graduate', 'Self_Employed_No', 'Self_Employed_Yes', 'Property_Area_Rural', 'Property_Area_Semiurban', 'Property_Area_Urban']
Sample DataFrame columns: Index(['ApplicantIncome', 'CoapplicantIncome', 'LoanAmount',
       'Loan_Amount_Term', 'Credit_History', 'Gender_Female', 'Gender_Male',
       'Married_No', 'Married_Yes', 'Dependents_0', 'Dependents_1',
       'Dependents_2', 'Dependents_3+', 'Education_Graduate',
       'Education_Not Graduate', 'Self_Employed_No', 'Self_Employed_Yes',
       'Property_Area_Rural', 'Property_Area_Semiurban',
       'Property_Area_Urban'],
      dtype='object')
Missing columns: set()
Extra columns: set()
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
Pre