In [None]:
import pandas as pd
import pickle
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer # Import SimpleImputer

# Load dataset
df = pd.read_csv("/content/train.csv")

# Preprocessing function
def preprocess(df):
    df = df.copy()
    df['Gender'].fillna(df['Gender'].mode()[0], inplace=True)
    df['Married'].fillna(df['Married'].mode()[0], inplace=True)
    df['Dependents'].fillna(df['Dependents'].mode()[0], inplace=True)
    df['Self_Employed'].fillna(df['Self_Employed'].mode()[0], inplace=True)
    df['Credit_History'].fillna(df['Credit_History'].mode()[0], inplace=True)

    # Impute missing numerical values with the mean
    numerical_features = ['LoanAmount', 'Loan_Amount_Term']
    imputer = SimpleImputer(strategy='mean') # Create an imputer instance
    df[numerical_features] = imputer.fit_transform(df[numerical_features]) # Fit and transform

    df.replace({
        'Gender': {'Male': 1, 'Female': 0},
        'Married': {'Yes': 1, 'No': 0},
        'Education': {'Graduate': 1, 'Not Graduate': 0},
        'Self_Employed': {'Yes': 1, 'No': 0},
        'Property_Area': {'Urban': 2, 'Semiurban': 1, 'Rural': 0},
        'Dependents': {'3+': 4}
    }, inplace=True)

    df['Dependents'] = df['Dependents'].astype(int)

    return df

# Preprocess the data
df = preprocess(df)

# Split features and target
X = df.drop(columns=['Loan_Status', 'Loan_ID'])
y = df['Loan_Status'].map({'N': 0, 'Y': 1})

# Initialize and train AdaBoost model
base_estimator = DecisionTreeClassifier(max_depth=1)  # Weak learner
model = AdaBoostClassifier(estimator=base_estimator, n_estimators=100, learning_rate=1.0)
model.fit(X, y)

# Save model
with open("loan_status_model.pkl", "wb") as f:
    pickle.dump(model, f)

print("✅ AdaBoost model trained and saved as 'loan_status_model.pkl'")

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Gender'].fillna(df['Gender'].mode()[0], inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Married'].fillna(df['Married'].mode()[0], inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on whi

✅ AdaBoost model trained and saved as 'loan_status_model.pkl'
