<a href="https://colab.research.google.com/github/Nitya131003/encryptix_project/blob/main/app_task3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Import Libraries

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score, roc_auc_score
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from flask import Flask, request, jsonify

Load and Preprocess the Data

In [None]:
# Load data (replace 'data.csv' with your dataset)
data = pd.read_csv('Churn_Modelling.csv')

In [None]:
# Example preprocessing
# Encode categorical features
label_encoders = {}
for column in ['Gender', 'Geography']:
    le = LabelEncoder()
    data[column] = le.fit_transform(data[column])
    label_encoders[column] = le

In [None]:
# Feature-target split
X = data.drop(['CustomerId', 'Surname', 'Exited'], axis=1)  # Drop irrelevant features and the target
y = data['Exited']  # 'Exited' is the churn label

In [None]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

Model Training and Hyperparameter Tuning

In [None]:
# Logistic Regression
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)

In [None]:
# Random Forest
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)

In [None]:
# XGBoost
xgb = XGBClassifier(random_state=42)
xgb.fit(X_train, y_train)

In [None]:
# Hyperparameter tuning for Random Forest (example)
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [10, 20, 30],
    'min_samples_split': [2, 5, 10]
}
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=3, n_jobs=-1, verbose=2, scoring='roc_auc')
grid_search.fit(X_train, y_train)
best_rf = grid_search.best_estimator_

Fitting 3 folds for each of 27 candidates, totalling 81 fits


Model Evaluation

In [None]:
# Evaluate models
models = {'Logistic Regression': log_reg, 'Random Forest': best_rf, 'XGBoost': xgb}

for name, model in models.items():
    y_pred = model.predict(X_test)
    print(f"Model: {name}")
    print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
    print(f"AUC-ROC: {roc_auc_score(y_test, y_pred)}")
    print(classification_report(y_test, y_pred))
    print('-' * 60)

Model: Logistic Regression
Accuracy: 0.815
AUC-ROC: 0.5734738762190227
              precision    recall  f1-score   support

           0       0.83      0.97      0.89      1607
           1       0.60      0.18      0.27       393

    accuracy                           0.81      2000
   macro avg       0.71      0.57      0.58      2000
weighted avg       0.78      0.81      0.77      2000

------------------------------------------------------------
Model: Random Forest
Accuracy: 0.863
AUC-ROC: 0.6956112807991753
              precision    recall  f1-score   support

           0       0.87      0.97      0.92      1607
           1       0.78      0.42      0.55       393

    accuracy                           0.86      2000
   macro avg       0.83      0.70      0.73      2000
weighted avg       0.85      0.86      0.85      2000

------------------------------------------------------------
Model: XGBoost
Accuracy: 0.8565
AUC-ROC: 0.7204002527111825
              precision    r

Model Deployment with Flask

In [None]:
app = Flask(__name__)

@app.route('/predict', methods=['POST'])
def predict():
    data = request.get_json(force=True)
    df = pd.DataFrame(data)

    # Preprocessing
    for column, le in label_encoders.items():
        df[column] = le.transform(df[column])
    df = scaler.transform(df)

    # Predict using the best model
    prediction = best_rf.predict(df)

    return jsonify({'prediction': int(prediction[0])})

if __name__ == '__main__':
    app.run(debug=True)


 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug: * Restarting with stat


 Run the Flask App

In [None]:
y_pred = rf.predict(X_test)
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
print(f"AUC-ROC: {roc_auc_score(y_test, y_pred)}")
print(classification_report(y_test, y_pred))

Accuracy: 0.864
AUC-ROC: 0.7096893204190952
              precision    recall  f1-score   support

           0       0.88      0.96      0.92      1607
           1       0.76      0.46      0.57       393

    accuracy                           0.86      2000
   macro avg       0.82      0.71      0.74      2000
weighted avg       0.85      0.86      0.85      2000



In [None]:
!python App.ipynb

python3: can't open file '/content/App.ipynb': [Errno 2] No such file or directory


In [None]:
!curl -X POST -H "Content-Type: application/json" \
     -d '[{"CreditScore": 600, "Geography": "France", "Gender": "Male", "Age": 40, "Tenure": 3, "Balance": 60000, "NumOfProducts": 2, "HasCrCard": 1, "IsActiveMember": 1, "EstimatedSalary": 50000}]' \
     http://127.0.0.1:5000/predict

curl: (7) Failed to connect to 127.0.0.1 port 5000 after 0 ms: Connection refused


In [None]:
input_data = (619, "France", "Female", 42, 2, 0, 1, 1, 1, 101348.88)  # Remove extra features

# change the input data to numpy array
input_data_as_numpy_array = np.asarray(input_data)

# reshape the numpy array as we are predicting for one datapoint
input_data_reshape = input_data_as_numpy_array.reshape(1,-1)

prediction = model.predict(input_data_reshape)
print(prediction)

if(prediction[0] == 0):
  print('NO CHURN')
else:
  print('CHURN')

ValueError: Feature shape mismatch, expected: 11, got 10