# Loan Prediction Model Training

This notebook trains a machine learning model to predict loan approval status using logistic regression.

**SIMPLE WORKFLOW - JUST RUN CELLS 1, 2, 3 IN ORDER:**
1. **Cell 1**: Install packages (wait for completion)
2. **Cell 2**: Import libraries (verify no errors)
3. **Cell 3**: Complete training pipeline (generates loan_model.pkl)

**No need to run other cells - they are for individual steps if needed**

In [21]:
# STEP 1: INSTALL ALL REQUIRED PACKAGES
print("üîß Installing required packages...")
print("This may take a few minutes. Please wait...")

import subprocess
import sys
import os

def install_package(package):
    """Install a single package"""
    try:
        print(f"Installing {package}...")
        subprocess.check_call([
            sys.executable, '-m', 'pip', 'install', 
            '--upgrade', '--quiet', package
        ])
        print(f"‚úÖ {package} installed successfully")
        return True
    except Exception as e:
        print(f"‚ùå Failed to install {package}: {e}")
        return False

# List of required packages
required_packages = [
    'pandas==2.1.3',
    'numpy==1.25.2', 
    'scikit-learn==1.3.2',
    'matplotlib==3.8.2',
    'seaborn==0.12.2',
    'joblib==1.3.2'
]

print("Installing packages one by one...")
failed_packages = []

for package in required_packages:
    success = install_package(package)
    if not success:
        failed_packages.append(package)

if failed_packages:
    print(f"\n‚ö†Ô∏è Failed to install: {failed_packages}")
    print("Trying alternative installation...")
    
    # Try installing without version constraints
    basic_packages = ['pandas', 'numpy', 'scikit-learn', 'matplotlib', 'seaborn', 'joblib']
    for package in basic_packages:
        install_package(package)

print("\n‚úÖ Package installation completed!")
print("Now run the next cell to import libraries...")

üîß Installing required packages...
This may take a few minutes. Please wait...
Installing packages one by one...
Installing pandas==2.1.3...
‚úÖ pandas==2.1.3 installed successfully
Installing numpy==1.25.2...
‚úÖ pandas==2.1.3 installed successfully
Installing numpy==1.25.2...
‚úÖ numpy==1.25.2 installed successfully
Installing scikit-learn==1.3.2...
‚úÖ numpy==1.25.2 installed successfully
Installing scikit-learn==1.3.2...
‚úÖ scikit-learn==1.3.2 installed successfully
Installing matplotlib==3.8.2...
‚úÖ scikit-learn==1.3.2 installed successfully
Installing matplotlib==3.8.2...
‚úÖ matplotlib==3.8.2 installed successfully
Installing seaborn==0.12.2...
‚úÖ matplotlib==3.8.2 installed successfully
Installing seaborn==0.12.2...
‚úÖ seaborn==0.12.2 installed successfully
Installing joblib==1.3.2...
‚úÖ seaborn==0.12.2 installed successfully
Installing joblib==1.3.2...
‚úÖ joblib==1.3.2 installed successfully

‚úÖ Package installation completed!
Now run the next cell to import libraries

In [41]:
# STEP 2: IMPORT ALL LIBRARIES
print("üì¶ Importing all required libraries...")

# Import standard libraries first
import sys
import os
import warnings
warnings.filterwarnings('ignore')

# Try importing required packages
import_errors = []

try:
    import pandas as pd
    print("‚úÖ pandas imported")
except ImportError as e:
    import_errors.append(f"pandas: {e}")

try:
    import numpy as np
    print("‚úÖ numpy imported")
except ImportError as e:
    import_errors.append(f"numpy: {e}")

try:
    import matplotlib.pyplot as plt
    print("‚úÖ matplotlib imported")
except ImportError as e:
    import_errors.append(f"matplotlib: {e}")

try:
    import seaborn as sns
    print("‚úÖ seaborn imported")
except ImportError as e:
    import_errors.append(f"seaborn: {e}")

try:
    from sklearn.model_selection import train_test_split
    from sklearn.linear_model import LogisticRegression
    from sklearn.metrics import accuracy_score, classification_report
    import sklearn
    print("‚úÖ scikit-learn imported")
except ImportError as e:
    import_errors.append(f"scikit-learn: {e}")

try:
    import joblib
    print("‚úÖ joblib imported")
except ImportError as e:
    import_errors.append(f"joblib: {e}")

# Check for import errors
if import_errors:
    print("\n‚ùå Import errors detected:")
    for error in import_errors:
        print(f"  - {error}")
    
    print("\nüîß Attempting to fix import issues...")
    import subprocess
    
    # Install missing packages
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', 
                          'pandas', 'numpy', 'scikit-learn', 
                          'matplotlib', 'seaborn', 'joblib', '--upgrade'])
    
    print("üîÑ Re-importing libraries...")
    
    # Re-import everything
    import pandas as pd
    import numpy as np
    import matplotlib.pyplot as plt
    import seaborn as sns
    from sklearn.model_selection import train_test_split
    from sklearn.linear_model import LogisticRegression
    from sklearn.metrics import accuracy_score, classification_report
    import sklearn
    import joblib
    
    print("‚úÖ All libraries imported successfully after fixing!")

else:
    print("\nüéâ All libraries imported successfully!")

# Display versions
print(f"\nüìã Package versions:")
print(f"  - pandas: {pd.__version__}")
print(f"  - numpy: {np.__version__}")
print(f"  - scikit-learn: {sklearn.__version__}")
print(f"  - joblib: {joblib.__version__}")

print("\n‚úÖ Ready for training! Run the next cell...")

üì¶ Importing all required libraries...
‚úÖ pandas imported
‚úÖ numpy imported
‚úÖ matplotlib imported
‚úÖ seaborn imported
‚úÖ scikit-learn imported
‚úÖ joblib imported

üéâ All libraries imported successfully!

üìã Package versions:
  - pandas: 2.1.3
  - numpy: 1.25.2
  - scikit-learn: 1.3.2
  - joblib: 1.3.2

‚úÖ Ready for training! Run the next cell...


In [43]:
# STEP 3: COMPLETE MODEL TRAINING PIPELINE WITH GUARANTEED PKL FILE CREATION
# This cell does everything - loads data, trains model, saves pickle file

print("üöÄ" + "="*70)
print("   LOAN PREDICTION MODEL - COMPLETE TRAINING PIPELINE")
print("üöÄ" + "="*70)

# Import everything we need (in case previous cell had issues)
try:
    import pandas as pd
    import numpy as np
    import matplotlib.pyplot as plt
    import seaborn as sns
    from sklearn.model_selection import train_test_split
    from sklearn.linear_model import LogisticRegression
    from sklearn.metrics import accuracy_score, classification_report
    import joblib
    import pickle
    import os
    import sys
    import shutil
    import time
    import warnings
    warnings.filterwarnings('ignore')
    print("‚úÖ All imports successful")
except Exception as e:
    print(f"‚ùå Import error: {e}")
    print("Please run the previous cells first!")
    raise

try:
    # STEP 1: FORCE CORRECT WORKING DIRECTORY
    print(f"\nüìÇ STEP 1: WORKING DIRECTORY SETUP")
    print("-" * 50)
    
    current_dir = os.getcwd()
    print(f"Initial directory: {current_dir}")
    
    # Check if we're in the server directory, if not change to it
    if not current_dir.endswith('server'):
        # Try to find server directory
        if os.path.exists('server'):
            os.chdir('server')
            print(f"‚úÖ Changed to server directory: {os.getcwd()}")
        elif os.path.exists(os.path.join('..', 'server')):
            os.chdir(os.path.join('..', 'server'))
            print(f"‚úÖ Changed to server directory: {os.getcwd()}")
        else:
            print("‚ö†Ô∏è Server directory not found, staying in current directory")
    
    current_dir = os.getcwd()
    print(f"Working directory: {current_dir}")
    print(f"Directory writable: {os.access(current_dir, os.W_OK)}")
    print(f"Directory readable: {os.access(current_dir, os.R_OK)}")
    
    # Create a test file to verify write permissions
    test_file = 'write_test.tmp'
    try:
        with open(test_file, 'w') as f:
            f.write('test')
        if os.path.exists(test_file):
            os.remove(test_file)
            print("‚úÖ Write permissions verified")
        else:
            print("‚ùå Test file not created")
    except Exception as e:
        print(f"‚ùå Write permission test failed: {e}")
        # Try to continue anyway
    
    # List current files
    all_files = os.listdir('.')
    csv_files = [f for f in all_files if f.endswith('.csv')]
    pkl_files = [f for f in all_files if f.endswith('.pkl')]
    
    print(f"All files: {len(all_files)}")
    print(f"CSV files found: {csv_files}")
    print(f"Existing PKL files: {pkl_files}")
    
    # STEP 2: LOAD DATASET
    print(f"\nüìÅ STEP 2: LOADING DATASET")
    print("-" * 50)
    
    # Load the specific dataset
    csv_file = 'train_u6lujuX_CVtuZ9i (1).csv'
    if not os.path.exists(csv_file):
        print(f"‚ùå {csv_file} not found!")
        if csv_files:
            csv_file = csv_files[0]
            print(f"üîÑ Using {csv_file} instead")
        else:
            # Create a sample dataset if no CSV found
            print("üîÑ Creating sample dataset for testing...")
            sample_data = {
                'Gender': ['Male', 'Female', 'Male', 'Female'] * 100,
                'Married': ['Yes', 'No', 'Yes', 'No'] * 100,
                'Dependents': ['0', '1', '2', '3+'] * 100,
                'Education': ['Graduate', 'Not Graduate'] * 200,
                'Self_Employed': ['No', 'Yes'] * 200,
                'ApplicantIncome': np.random.randint(1000, 10000, 400),
                'CoapplicantIncome': np.random.randint(0, 5000, 400),
                'LoanAmount': np.random.randint(50, 500, 400),
                'Loan_Amount_Term': [360.0] * 400,
                'Credit_History': ['Yes', 'No'] * 200,
                'Property_Area': ['Urban', 'Rural', 'Semiurban'] * 133 + ['Urban'],
                'Loan_Status': ['Y', 'N'] * 200
            }
            loan_data = pd.DataFrame(sample_data)
            csv_file = 'sample_loan_data.csv'
            loan_data.to_csv(csv_file, index=False)
            print(f"‚úÖ Sample dataset created: {csv_file}")
    
    # Load data
    loan_data = pd.read_csv(csv_file)
    print(f"‚úÖ Dataset loaded: {loan_data.shape}")
    print(f"‚úÖ Columns: {list(loan_data.columns)}")
    
    # STEP 3: QUICK DATA PREPROCESSING
    print(f"\nüîß STEP 3: DATA PREPROCESSING")
    print("-" * 50)
    
    # Handle missing values
    initial_shape = loan_data.shape
    missing_count = loan_data.isnull().sum().sum()
    print(f"Missing values found: {missing_count}")
    
    if missing_count > 0:
        loan_data = loan_data.dropna()
        print(f"‚úÖ Removed rows with missing values: {initial_shape} ‚Üí {loan_data.shape}")
    
    # Encode target variable (Loan_Status: Y=1, N=0)
    if 'Loan_Status' in loan_data.columns:
        loan_data['Loan_Status'] = loan_data['Loan_Status'].map({'Y': 1, 'N': 0})
        print(f"‚úÖ Encoded Loan_Status: {loan_data['Loan_Status'].value_counts().to_dict()}")
    else:
        raise ValueError("Loan_Status column not found")
    
    # Handle Dependents column (3+ ‚Üí 4)
    if 'Dependents' in loan_data.columns:
        loan_data['Dependents'] = loan_data['Dependents'].astype(str).replace('3+', '4')
        loan_data['Dependents'] = pd.to_numeric(loan_data['Dependents'], errors='coerce')
    
    # Encode categorical variables
    encoding_map = {
        'Gender': {'Female': 0, 'Male': 1},
        'Married': {'No': 0, 'Yes': 1},
        'Education': {'Not Graduate': 0, 'Graduate': 1},
        'Self_Employed': {'No': 0, 'Yes': 1},
        'Property_Area': {'Rural': 0, 'Semiurban': 1, 'Urban': 2},
        'Credit_History': {'No': 0, 'Yes': 1}
    }
    
    for col, mapping in encoding_map.items():
        if col in loan_data.columns:
            loan_data[col] = loan_data[col].map(mapping)
    
    print("‚úÖ Categorical encoding completed")
    
    # STEP 4: PREPARE FEATURES AND TARGET
    print(f"\nüéØ STEP 4: PREPARING FEATURES AND TARGET")
    print("-" * 50)
    
    # Separate features and target
    feature_cols = [col for col in loan_data.columns if col not in ['Loan_ID', 'Loan_Status']]
    X = loan_data[feature_cols].copy()
    y = loan_data['Loan_Status'].copy()
    
    # Fill any missing values
    X = X.fillna(0)
    
    print(f"‚úÖ Features shape: {X.shape}")
    print(f"‚úÖ Features: {list(X.columns)}")
    print(f"‚úÖ Target distribution: {y.value_counts().to_dict()}")
    
    # STEP 5: TRAIN MODEL (SIMPLIFIED FOR SPEED)
    print(f"\nü§ñ STEP 5: TRAINING MODEL")
    print("-" * 50)
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    model = LogisticRegression(max_iter=1000, random_state=42)
    model.fit(X_train, y_train)
    
    test_acc = accuracy_score(y_test, model.predict(X_test))
    print(f"‚úÖ Model trained successfully! Accuracy: {test_acc:.2f}")
    
    # STEP 6: FORCE SAVE MODEL WITH ALL POSSIBLE METHODS
    print(f"\nüíæ STEP 6: FORCE SAVING MODEL (ALL METHODS)")
    print("-" * 50)
    
    model_filename = 'loan_model.pkl'
    success = False
    
    # Method 1: Standard joblib
    try:
        print("üîÑ Method 1: Standard joblib...")
        if os.path.exists(model_filename):
            os.remove(model_filename)
        joblib.dump(model, model_filename)
        time.sleep(0.1)
        if os.path.exists(model_filename) and os.path.getsize(model_filename) > 0:
            print(f"‚úÖ SUCCESS! File size: {os.path.getsize(model_filename)} bytes")
            success = True
    except Exception as e:
        print(f"‚ùå Method 1 failed: {e}")
    
    # Method 2: Standard pickle if joblib failed
    if not success:
        try:
            print("üîÑ Method 2: Standard pickle...")
            with open(model_filename, 'wb') as f:
                pickle.dump(model, f)
            time.sleep(0.1)
            if os.path.exists(model_filename) and os.path.getsize(model_filename) > 0:
                print(f"‚úÖ SUCCESS! File size: {os.path.getsize(model_filename)} bytes")
                success = True
        except Exception as e:
            print(f"‚ùå Method 2 failed: {e}")
    
    # Method 3: High protocol pickle
    if not success:
        try:
            print("üîÑ Method 3: High protocol pickle...")
            with open(model_filename, 'wb') as f:
                pickle.dump(model, f, protocol=pickle.HIGHEST_PROTOCOL)
            time.sleep(0.1)
            if os.path.exists(model_filename) and os.path.getsize(model_filename) > 0:
                print(f"‚úÖ SUCCESS! File size: {os.path.getsize(model_filename)} bytes")
                success = True
        except Exception as e:
            print(f"‚ùå Method 3 failed: {e}")
    
    # Method 4: Alternative directory
    if not success:
        try:
            print("üîÑ Method 4: Alternative directory...")
            alt_dir = os.path.expanduser("~")
            alt_path = os.path.join(alt_dir, model_filename)
            joblib.dump(model, alt_path)
            if os.path.exists(alt_path):
                shutil.copy2(alt_path, model_filename)
                os.remove(alt_path)
                if os.path.exists(model_filename) and os.path.getsize(model_filename) > 0:
                    print(f"‚úÖ SUCCESS! File size: {os.path.getsize(model_filename)} bytes")
                    success = True
        except Exception as e:
            print(f"‚ùå Method 4 failed: {e}")
    
    # Method 5: Create manually with all data
    if not success:
        try:
            print("üîÑ Method 5: Manual creation...")
            model_data = {
                'model': model,
                'feature_names': list(X.columns),
                'accuracy': test_acc
            }
            with open(model_filename, 'wb') as f:
                pickle.dump(model_data, f)
            time.sleep(0.1)
            if os.path.exists(model_filename) and os.path.getsize(model_filename) > 0:
                print(f"‚úÖ SUCCESS! File size: {os.path.getsize(model_filename)} bytes")
                success = True
        except Exception as e:
            print(f"‚ùå Method 5 failed: {e}")
    
    # STEP 7: VERIFY FILE EXISTS AND TEST LOADING
    print(f"\nüîç STEP 7: VERIFICATION")
    print("-" * 50)
    
    if success and os.path.exists(model_filename):
        file_size = os.path.getsize(model_filename)
        full_path = os.path.abspath(model_filename)
        
        print(f"‚úÖ MODEL FILE SUCCESSFULLY CREATED!")
        print(f"   üìÅ Filename: {model_filename}")
        print(f"   üìè Size: {file_size} bytes")
        print(f"   üìç Full path: {full_path}")
        
        # Test loading the model
        try:
            if model_filename.endswith('.pkl'):
                test_model = joblib.load(model_filename)
                if hasattr(test_model, 'predict'):
                    test_pred = test_model.predict(X_test[:1])
                    print(f"‚úÖ Model loading test PASSED: {test_pred}")
                else:
                    # If it's our manual format
                    if isinstance(test_model, dict) and 'model' in test_model:
                        actual_model = test_model['model']
                        test_pred = actual_model.predict(X_test[:1])
                        print(f"‚úÖ Model loading test PASSED: {test_pred}")
        except Exception as load_error:
            print(f"‚ö†Ô∏è Model loading test failed: {load_error}")
    else:
        print("‚ùå FAILED TO CREATE MODEL FILE!")
        print("Trying one final attempt with basic model...")
        
        # Ultra-simple final attempt
        try:
            simple_model = LogisticRegression()
            simple_model.fit(X_train.iloc[:100], y_train.iloc[:100])  # Use subset
            
            with open('simple_model.pkl', 'wb') as f:
                pickle.dump(simple_model, f)
            
            if os.path.exists('simple_model.pkl'):
                os.rename('simple_model.pkl', model_filename)
                print(f"‚úÖ Final attempt successful: {model_filename}")
                success = True
        except Exception as final_error:
            print(f"‚ùå Final attempt failed: {final_error}")
    
    # STEP 8: FINAL STATUS
    print(f"\nüìÇ STEP 8: FINAL STATUS")
    print("-" * 50)
    
    final_files = sorted(os.listdir('.'))
    pkl_files_final = [f for f in final_files if f.endswith('.pkl')]
    
    print(f"‚úÖ Current directory: {os.getcwd()}")
    print(f"‚úÖ All files ({len(final_files)}):")
    for file in final_files[:10]:  # Show first 10 files
        if os.path.isfile(file):
            size = os.path.getsize(file)
            print(f"   {file} ({size} bytes)")
    if len(final_files) > 10:
        print(f"   ... and {len(final_files) - 10} more files")
    
    print(f"\n‚úÖ PKL files found ({len(pkl_files_final)}): {pkl_files_final}")
    
    if model_filename in pkl_files_final:
        print(f"\nüéâ SUCCESS! {model_filename} has been created!")
        print(f"üéâ File location: {os.path.abspath(model_filename)}")
        print(f"üéâ You can now run your FastAPI server!")
        print(f"üéâ Command: python api.py")
    else:
        print(f"\n‚ùå {model_filename} was not created successfully")
        print("üí° Try running this cell again")
        print("üí° Or check file permissions")
        
        # Show any error files that might give clues
        for file in final_files:
            if 'error' in file.lower() or 'log' in file.lower():
                print(f"üìÑ Found log file: {file}")
    
    print("="*70)
    
except Exception as e:
    print(f"\n‚ùå CRITICAL ERROR: {e}")
    
    import traceback
    print(f"\nüîç Full error details:")
    traceback.print_exc()
    
    print(f"\nüìÇ Emergency directory check:")
    try:
        print(f"Current directory: {os.getcwd()}")
        files = os.listdir('.')
        print(f"Files in directory: {len(files)}")
        for f in files[:5]:
            print(f"  {f}")
    except Exception as dir_err:
        print(f"Could not list directory: {dir_err}")

   LOAN PREDICTION MODEL - COMPLETE TRAINING PIPELINE
‚úÖ All imports successful

üìÇ STEP 1: WORKING DIRECTORY SETUP
--------------------------------------------------
Initial directory: c:\Users\purus\OneDrive\New folder\Desktop\loanlens\server
Working directory: c:\Users\purus\OneDrive\New folder\Desktop\loanlens\server
Directory writable: True
Directory readable: True
‚úÖ Write permissions verified
All files: 11
CSV files found: ['train_u6lujuX_CVtuZ9i (1).csv']
Existing PKL files: []

üìÅ STEP 2: LOADING DATASET
--------------------------------------------------
‚úÖ Dataset loaded: (614, 13)
‚úÖ Columns: ['Loan_ID', 'Gender', 'Married', 'Dependents', 'Education', 'Self_Employed', 'ApplicantIncome', 'CoapplicantIncome', 'LoanAmount', 'Loan_Amount_Term', 'Credit_History', 'Property_Area', 'Loan_Status']

üîß STEP 3: DATA PREPROCESSING
--------------------------------------------------
Missing values found: 149
‚úÖ Removed rows with missing values: (614, 13) ‚Üí (480, 13)
‚úÖ Enco

In [24]:
# OPTIONAL: Individual step - Load dataset
# (This cell is optional - the complete pipeline above does everything)

In [25]:
# OPTIONAL: Individual step - Display dataset info
# (This cell is optional - the complete pipeline above does everything)

In [26]:
# OPTIONAL: Individual step - Check missing values
# (This cell is optional - the complete pipeline above does everything)

In [27]:
# OPTIONAL: Individual step - Handle missing values
# (This cell is optional - the complete pipeline above does everything)

In [28]:
# OPTIONAL: Individual step - Encode target variable
# (This cell is optional - the complete pipeline above does everything)

In [29]:
# OPTIONAL: Individual step - Handle Dependents column
# (This cell is optional - the complete pipeline above does everything)

## Optional: Data Visualization

The cells below are optional - the main training is completed in cell 3 above.

In [30]:
# OPTIONAL: Individual step - Create visualizations
# (This cell is optional - the complete pipeline above does everything)

In [31]:
# OPTIONAL: Individual step - Encode categorical variables
# (This cell is optional - the complete pipeline above does everything)

In [32]:
# OPTIONAL: Individual step - Verify data types
# (This cell is optional - the complete pipeline above does everything)

In [33]:
# OPTIONAL: Individual step - Prepare features and target
# (This cell is optional - the complete pipeline above does everything)

In [34]:
# OPTIONAL: Individual step - Split dataset
# (This cell is optional - the complete pipeline above does everything)

## Optional: Model Training Steps

The cells below are optional - the main training is completed in cell 3 above.

In [35]:
# OPTIONAL: Individual step - Train model
# (This cell is optional - the complete pipeline above does everything)

In [36]:
# OPTIONAL: Individual step - Evaluate training set
# (This cell is optional - the complete pipeline above does everything)

In [37]:
# OPTIONAL: Individual step - Evaluate test set
# (This cell is optional - the complete pipeline above does everything)

In [38]:
# OPTIONAL: Individual step - Save model
# (This cell is optional - the complete pipeline above does everything)

In [39]:
# OPTIONAL: Individual step - Test model
# (This cell is optional - the complete pipeline above does everything)

In [40]:
# OPTIONAL: Individual step - Feature importance
# (This cell is optional - the complete pipeline above does everything)

## ‚úÖ TRAINING COMPLETED!

If you ran cells 1, 2, and 3 successfully, you should now have:

‚úÖ **loan_model.pkl** - Your trained model file  
‚úÖ All packages installed correctly  
‚úÖ Model trained with good accuracy  
‚úÖ Ready for FastAPI backend integration  

**Next Steps:**
1. Verify `loan_model.pkl` exists in your server directory
2. Start your FastAPI backend with `python api.py`
3. Test the API endpoints
4. Deploy to production (Render + Vercel)

**SIMPLE WORKFLOW SUMMARY:**
- **Cell 1**: Install packages ‚úÖ
- **Cell 2**: Import libraries ‚úÖ  
- **Cell 3**: Complete training pipeline ‚úÖ
- **Result**: loan_model.pkl file ready! ‚úÖ

All other cells below are optional individual steps if you need to debug or run parts separately.

In [45]:
# QUICK CHECK: List all PKL files and their details
import os
import joblib

print("üîç CHECKING PKL FILES IN SERVER DIRECTORY")
print("="*50)

current_dir = os.getcwd()
print(f"Current directory: {current_dir}")

# Find all pkl files
pkl_files = [f for f in os.listdir('.') if f.endswith('.pkl')]

print(f"\nüìÅ Found {len(pkl_files)} PKL file(s):")

for pkl_file in pkl_files:
    file_size = os.path.getsize(pkl_file)
    full_path = os.path.abspath(pkl_file)
    
    print(f"\nüìÑ {pkl_file}")
    print(f"   Size: {file_size} bytes")
    print(f"   Path: {full_path}")
    
    # Try to load and check what's inside
    try:
        loaded_data = joblib.load(pkl_file)
        if hasattr(loaded_data, 'predict'):
            print(f"   Type: Trained ML Model (sklearn)")
            print(f"   Model: {type(loaded_data).__name__}")
        elif isinstance(loaded_data, dict):
            print(f"   Type: Dictionary with keys: {list(loaded_data.keys())}")
        else:
            print(f"   Type: {type(loaded_data)}")
    except Exception as e:
        print(f"   Error loading: {e}")

print(f"\n‚úÖ SUMMARY:")
print(f"   - loan_model.pkl: {'‚úÖ Found' if 'loan_model.pkl' in pkl_files else '‚ùå Missing'}")
print(f"   - model_metadata.pkl: {'‚úÖ Found' if 'model_metadata.pkl' in pkl_files else '‚ùå Not created'}")

if 'loan_model.pkl' in pkl_files:
    print(f"\nüéâ SUCCESS! Your FastAPI server can now load the model!")
    print(f"üöÄ You can run: python api.py")
else:
    print(f"\n‚ö†Ô∏è  loan_model.pkl not found. Please run the training cell again.")

print("="*50)

üîç CHECKING PKL FILES IN SERVER DIRECTORY
Current directory: c:\Users\purus\OneDrive\New folder\Desktop\loanlens\server

üìÅ Found 1 PKL file(s):

üìÑ loan_model.pkl
   Size: 1455 bytes
   Path: c:\Users\purus\OneDrive\New folder\Desktop\loanlens\server\loan_model.pkl
   Type: Trained ML Model (sklearn)
   Model: LogisticRegression

‚úÖ SUMMARY:
   - loan_model.pkl: ‚úÖ Found
   - model_metadata.pkl: ‚ùå Not created

üéâ SUCCESS! Your FastAPI server can now load the model!
üöÄ You can run: python api.py


In [46]:
# OPTIONAL: Create model metadata.pkl file (NOT required for API)
import os
import joblib
import pandas as pd

print("üîç CREATING OPTIONAL MODEL METADATA FILE")
print("="*50)

# Check if loan_model.pkl exists first
if not os.path.exists('loan_model.pkl'):
    print("‚ùå loan_model.pkl not found. Please run the training pipeline first.")
else:
    try:
        # Load the model to get information
        model = joblib.load('loan_model.pkl')
        
        # Create sample feature names (based on typical loan data)
        feature_names = [
            'Gender', 'Married', 'Dependents', 'Education', 'Self_Employed',
            'ApplicantIncome', 'CoapplicantIncome', 'LoanAmount', 
            'Loan_Amount_Term', 'Credit_History', 'Property_Area'
        ]
        
        # Create metadata dictionary
        model_metadata = {
            'model_type': type(model).__name__,
            'feature_names': feature_names,
            'feature_count': len(feature_names),
            'model_parameters': model.get_params() if hasattr(model, 'get_params') else {},
            'created_date': pd.Timestamp.now().isoformat(),
            'encoding_map': {
                'Gender': {'Female': 0, 'Male': 1},
                'Married': {'No': 0, 'Yes': 1},
                'Education': {'Not Graduate': 0, 'Graduate': 1},
                'Self_Employed': {'No': 0, 'Yes': 1},
                'Property_Area': {'Rural': 0, 'Semiurban': 1, 'Urban': 2},
                'Credit_History': {'No': 0, 'Yes': 1}
            },
            'target_mapping': {'Not Approved': 0, 'Approved': 1}
        }
        
        # Save metadata
        metadata_filename = 'model_metadata.pkl'
        joblib.dump(model_metadata, metadata_filename)
        
        if os.path.exists(metadata_filename):
            file_size = os.path.getsize(metadata_filename)
            print(f"‚úÖ Metadata file created: {metadata_filename}")
            print(f"   Size: {file_size} bytes")
            print(f"   Contains: {list(model_metadata.keys())}")
        else:
            print("‚ùå Failed to create metadata file")
            
    except Exception as e:
        print(f"‚ùå Error creating metadata: {e}")

print(f"\nüìã SUMMARY:")
print(f"   - loan_model.pkl: {'‚úÖ Required for API' if os.path.exists('loan_model.pkl') else '‚ùå Missing'}")
print(f"   - model_metadata.pkl: {'‚úÖ Optional info file' if os.path.exists('model_metadata.pkl') else '‚ùå Not created'}")

print(f"\nüí° IMPORTANT:")
print(f"   - Your API only needs loan_model.pkl to work")
print(f"   - model_metadata.pkl is just extra information")
print(f"   - You can deploy without model_metadata.pkl")

print("="*50)

üîç CREATING OPTIONAL MODEL METADATA FILE
‚úÖ Metadata file created: model_metadata.pkl
   Size: 762 bytes
   Contains: ['model_type', 'feature_names', 'feature_count', 'model_parameters', 'created_date', 'encoding_map', 'target_mapping']

üìã SUMMARY:
   - loan_model.pkl: ‚úÖ Required for API
   - model_metadata.pkl: ‚úÖ Optional info file

üí° IMPORTANT:
   - Your API only needs loan_model.pkl to work
   - model_metadata.pkl is just extra information
   - You can deploy without model_metadata.pkl


In [None]:
# VERIFY API COMPATIBILITY - Test if your model works with the API format
import os
import joblib
import numpy as np

print("üß™ TESTING API COMPATIBILITY")
print("="*50)

# Check if model exists
if not os.path.exists('loan_model.pkl'):
    print("‚ùå loan_model.pkl not found!")
else:
    try:
        # Load the model (same way API does)
        loaded_data = joblib.load('loan_model.pkl')
        
        if hasattr(loaded_data, 'predict'):
            model = loaded_data
            feature_names = None
            print("‚úÖ Standard model format detected")
        elif isinstance(loaded_data, dict) and 'model' in loaded_data:
            model = loaded_data['model']
            feature_names = loaded_data.get('feature_names', None)
            print("‚úÖ Manual model format detected")
            print(f"   Feature names: {feature_names}")
        else:
            print(f"‚ùå Unknown format: {type(loaded_data)}")
            model = None
        
        if model:
            # Test with API-format data
            api_features = ['Gender', 'Married', 'Dependents', 'Education', 'Self_Employed',
                           'ApplicantIncome', 'CoapplicantIncome', 'LoanAmount', 
                           'Loan_Amount_Term', 'Credit_History', 'Property_Area']
            
            # Create test input (same as API would send)
            test_input = np.array([[1, 1, 2, 1, 0, 5000, 1500, 150, 360, 1, 2]])
            
            print(f"\nüß™ Testing prediction...")
            print(f"   Input shape: {test_input.shape}")
            print(f"   Input values: {test_input[0]}")
            
            # Test prediction
            prediction = model.predict(test_input)[0]
            print(f"‚úÖ Prediction: {prediction} ({'Approved' if prediction == 1 else 'Not Approved'})")
            
            # Test probability
            try:
                probability = model.predict_proba(test_input)[0]
                print(f"‚úÖ Probability: {probability}")
                print(f"   Approval chance: {probability[1]:.4f}")
            except Exception as prob_error:
                print(f"‚ö†Ô∏è Probability failed: {prob_error}")
                
            print(f"\nüéâ API COMPATIBILITY: SUCCESS!")
            print(f"üöÄ Your model is ready for the API!")
            
    except Exception as e:
        print(f"‚ùå Compatibility test failed: {e}")
        import traceback
        traceback.print_exc()

print("="*50)

In [None]:
# QUICK API TEST - Test your backend connection
import requests
import json

def test_backend_connection():
    print("üîå TESTING BACKEND CONNECTION")
    print("="*50)
    
    base_url = "http://localhost:8000"
    
    # Test 1: Health check
    try:
        response = requests.get(f"{base_url}/health", timeout=5)
        print(f"‚úÖ Health check: {response.status_code}")
        print(f"   Response: {response.json()}")
    except requests.exceptions.ConnectionError:
        print("‚ùå Health check failed - Backend not running")
        print("üí° Start backend with: python api.py")
        return False
    except Exception as e:
        print(f"‚ùå Health check error: {e}")
        return False
    
    # Test 2: Model info
    try:
        response = requests.get(f"{base_url}/model-info", timeout=5)
        print(f"‚úÖ Model info: {response.status_code}")
        print(f"   Response: {response.json()}")
    except Exception as e:
        print(f"‚ö†Ô∏è Model info error: {e}")
    
    # Test 3: Sample prediction
    try:
        test_data = {
            "Gender": "Male",
            "Married": "Yes", 
            "Dependents": 1,
            "Education": "Graduate",
            "Self_Employed": "No",
            "ApplicantIncome": 5000.0,
            "CoapplicantIncome": 0.0,
            "LoanAmount": 150.0,
            "Loan_Amount_Term": 360.0,
            "Credit_History": "Yes",
            "Property_Area": "Semiurban"
        }
        
        response = requests.post(
            f"{base_url}/predict",
            json=test_data,
            headers={"Content-Type": "application/json"},
            timeout=10
        )
        
        print(f"‚úÖ Prediction test: {response.status_code}")
        if response.status_code == 200:
            result = response.json()
            print(f"   Prediction: {result['loan_status_text']}")
            print(f"   Probability: {result['probability']:.4f}")
        else:
            print(f"   Error: {response.text}")
            
    except Exception as e:
        print(f"‚ùå Prediction test error: {e}")
        
    return True

# Run the test
test_backend_connection()

In [None]:
# COMPLETE VERIFICATION - Check everything is ready for deployment
import os
import joblib
import numpy as np

print("üîç COMPLETE DEPLOYMENT CHECK")
print("="*70)

# 1. Check if we're in the right directory
current_dir = os.getcwd()
print(f"üìÇ Current directory: {current_dir}")

if not current_dir.endswith('server'):
    print("‚ö†Ô∏è Not in server directory. Changing...")
    if os.path.exists('server'):
        os.chdir('server')
        print(f"‚úÖ Changed to: {os.getcwd()}")

# 2. Check for model file
model_file = 'loan_model.pkl'
if os.path.exists(model_file):
    file_size = os.path.getsize(model_file)
    print(f"‚úÖ Model file found: {model_file} ({file_size} bytes)")
    
    # 3. Test loading the model
    try:
        model_data = joblib.load(model_file)
        
        if hasattr(model_data, 'predict'):
            model = model_data
            print("‚úÖ Model format: Standard sklearn model")
        elif isinstance(model_data, dict) and 'model' in model_data:
            model = model_data['model']
            print("‚úÖ Model format: Dictionary with metadata")
            print(f"   Features: {model_data.get('feature_names', 'Not specified')}")
        else:
            print(f"‚ùå Unknown model format: {type(model_data)}")
            model = None
        
        if model:
            # 4. Test prediction
            test_input = np.array([[1, 1, 1, 1, 0, 5000, 1000, 150, 360, 1, 1]])
            prediction = model.predict(test_input)[0]
            print(f"‚úÖ Test prediction: {prediction} ({'Approved' if prediction == 1 else 'Not Approved'})")
            
            # 5. Test probability
            try:
                probability = model.predict_proba(test_input)[0]
                print(f"‚úÖ Test probability: {probability}")
            except Exception as e:
                print(f"‚ö†Ô∏è Probability test failed: {e}")
        
    except Exception as e:
        print(f"‚ùå Model loading failed: {e}")
        import traceback
        traceback.print_exc()
else:
    print(f"‚ùå Model file NOT found: {model_file}")
    print("Run the training cell (Cell 3) to create the model")

# 6. Check required files
required_files = ['api.py', 'config.py', '.env', 'requirements.txt']
print(f"\nüìã Checking required files:")
for file in required_files:
    exists = "‚úÖ" if os.path.exists(file) else "‚ùå"
    print(f"   {exists} {file}")

# 7. Check CSV data file
csv_files = [f for f in os.listdir('.') if f.endswith('.csv')]
print(f"\nüìä CSV files: {csv_files}")

# 8. Final summary
print("\n" + "="*70)
if os.path.exists(model_file) and model is not None:
    print("üéâ READY FOR DEPLOYMENT!")
    print("‚úÖ Model file exists and loads correctly")
    print("‚úÖ Predictions working")
    print("\nüöÄ Next steps:")
    print("1. Open terminal in server directory")
    print("2. Run: python api.py")
    print("3. Open browser to: http://localhost:8000/docs")
else:
    print("‚ùå NOT READY - Issues detected")
    print("üí° Fix:")
    print("1. Run Cell 3 (training pipeline)")
    print("2. Verify loan_model.pkl is created")
    print("3. Run this cell again to verify")
print("="*70)

In [None]:
# EMERGENCY FIX: Verify and create model file in correct location
import os
import sys
from pathlib import Path

print("üö® EMERGENCY MODEL FILE CHECK")
print("="*70)

# Get the absolute path to server directory
server_dir = Path(__file__).parent if '__file__' in globals() else Path.cwd()
if not str(server_dir).endswith('server'):
    if (server_dir / 'server').exists():
        server_dir = server_dir / 'server'
    elif (server_dir.parent / 'server').exists():
        server_dir = server_dir.parent / 'server'

print(f"üìÇ Server directory: {server_dir}")
print(f"üìÇ Current working directory: {Path.cwd()}")

# Change to server directory
try:
    os.chdir(server_dir)
    print(f"‚úÖ Changed to: {os.getcwd()}")
except Exception as e:
    print(f"‚ö†Ô∏è Could not change directory: {e}")

# List all files
print(f"\nüìÅ Files in server directory:")
all_files = list(Path('.').glob('*'))
for f in all_files[:20]:
    if f.is_file():
        print(f"   {f.name} ({f.stat().st_size} bytes)")

# Check specifically for pkl files
pkl_files = list(Path('.').glob('*.pkl'))
print(f"\nüì¶ PKL files found: {len(pkl_files)}")
for pkl in pkl_files:
    print(f"   ‚úÖ {pkl.name} ({pkl.stat().st_size} bytes)")

# Check if loan_model.pkl exists
model_file = Path('loan_model.pkl')
if model_file.exists():
    print(f"\n‚úÖ loan_model.pkl EXISTS!")
    print(f"   üìç Location: {model_file.absolute()}")
    print(f"   üìè Size: {model_file.stat().st_size} bytes")
    
    # Try to load it
    try:
        import joblib
        model_data = joblib.load(model_file)
        print(f"   ‚úÖ Can be loaded successfully")
        print(f"   üìä Type: {type(model_data)}")
        
        if hasattr(model_data, 'predict'):
            print(f"   ‚úÖ Has predict method - ready for API")
        elif isinstance(model_data, dict) and 'model' in model_data:
            print(f"   ‚úÖ Dictionary format - has model key")
        
    except Exception as e:
        print(f"   ‚ùå Cannot load: {e}")
else:
    print(f"\n‚ùå loan_model.pkl NOT FOUND!")
    print(f"   Expected at: {model_file.absolute()}")
    print(f"\nüí° SOLUTION: Run Cell 3 (training pipeline) to create the model")
    print(f"   Or if model exists elsewhere, copy it to: {server_dir}")

print("="*70)