In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import joblib

# Load dataset
dataset = pd.read_csv("loan_data.csv")

# Check for missing values
if dataset.isnull().sum().any():
    print("Warning: Missing values found in the dataset. Consider handling them before proceeding.")

# Define features and target
features = dataset.iloc[:, :-1].values  # All columns except the last one
target = dataset.iloc[:, -1].values      # Last column as target

# Identify categorical columns (adjust indices based on your dataset)
categorical_columns = [1,2,4,6,7]  # Indices of categorical features 1, 2,4, 6, 7]

# One-Hot Encode categorical variables
ct = ColumnTransformer(
    transformers=[('encoder', OneHotEncoder(), categorical_columns)],
    remainder='passthrough'  # Keep the rest of the columns as they are
)

# Fit and transform the features
features = ct.fit_transform(features)

# Debugging output
print(f"Shape of features after encoding: {features.shape}")  # Check the shape of features

# Encode target variable
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
target = le.fit_transform(target)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Train Logistic Regression Model
classifier = LogisticRegression(max_iter=1000)  # Increase max_iter if convergence issues occur
classifier.fit(X_train, y_train)

# Evaluate model
y_pred = classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Model Accuracy: {accuracy:.2f}')

# Save model and transformer
joblib.dump(classifier, 'loan_model.pkl')
joblib.dump(ct, 'loan_transformer.pkl')

ValueError: all features must be in [0, 7] or [-8, 0]