In [1]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
df = pd.read_csv(r"C:\Users\original\Desktop\جدارات تالته ترم اول\ECU-3.3\Data-Customer-Churn.csv")  

In [3]:
df_columns=df.columns.tolist()
for i in df_columns:
    print(i)
    print(df[i].unique())

customerID
['7590-VHVEG' '5575-GNVDE' '3668-QPYBK' ... '4801-JZAZL' '8361-LTMKD'
 '3186-AJIEK']
gender
['Female' 'Male']
SeniorCitizen
[0 1]
Partner
['Yes' 'No']
Dependents
['No' 'Yes']
tenure
[ 1 34  2 45  8 22 10 28 62 13 16 58 49 25 69 52 71 21 12 30 47 72 17 27
  5 46 11 70 63 43 15 60 18 66  9  3 31 50 64 56  7 42 35 48 29 65 38 68
 32 55 37 36 41  6  4 33 67 23 57 61 14 20 53 40 59 24 44 19 54 51 26  0
 39]
PhoneService
['No' 'Yes']
MultipleLines
['No phone service' 'No' 'Yes']
InternetService
['DSL' 'Fiber optic' 'No']
OnlineSecurity
['No' 'Yes' 'No internet service']
OnlineBackup
['Yes' 'No' 'No internet service']
DeviceProtection
['No' 'Yes' 'No internet service']
TechSupport
['No' 'Yes' 'No internet service']
StreamingTV
['No' 'Yes' 'No internet service']
StreamingMovies
['No' 'Yes' 'No internet service']
Contract
['Month-to-month' 'One year' 'Two year']
PaperlessBilling
['Yes' 'No']
PaymentMethod
['Electronic check' 'Mailed check' 'Bank transfer (automatic)'
 'Credit card (a

In [4]:
import joblib
label_encode_cols = ["Partner", "Dependents", "PhoneService", "PaperlessBilling",'gender']
one_hot_encode_cols = ["MultipleLines", "InternetService", "OnlineSecurity", "OnlineBackup",
                       "DeviceProtection", "TechSupport", "StreamingTV", "StreamingMovies",
                       "Contract", "PaymentMethod"]
min_max_scale_cols = ["tenure", "MonthlyCharges", "TotalCharges"]

In [5]:
X = df.drop(columns=['customerID', 'Churn'])
y = df['Churn']

In [6]:
y.unique()

array(['No', 'Yes'], dtype=object)

In [7]:
le_target = LabelEncoder()
y = le_target.fit_transform(y)

**Label Encoder for the Target Variable**

In [8]:
joblib.dump(le_target, 'label_encoder_target.pkl')
print("Label encoder for target saved.")

Label encoder for target saved.


**Handle Missing Values and Convert Data Types**

In [9]:
X[min_max_scale_cols] = X[min_max_scale_cols].replace(' ', np.nan).astype(float)
X[min_max_scale_cols] = X[min_max_scale_cols].fillna(X[min_max_scale_cols].mean())

**Saving MinMax Scaler**

In [10]:
min_max_scaler = MinMaxScaler()
scaled_numerical = min_max_scaler.fit_transform(X[min_max_scale_cols])

In [11]:
joblib.dump(min_max_scaler, 'min_max_scaler.pkl')
print("Min-max scaler saved.")

Min-max scaler saved.


**Saving Label Encoders**

In [12]:
label_encoders = {}
for col in label_encode_cols:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col])
    label_encoders[col] = le

In [13]:
joblib.dump(label_encoders, 'label_encoders.pkl')
print("Label encoders saved.")

Label encoders saved.


**Saving One-Hot Encoder**

In [14]:
one_hot_encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
one_hot_encoded = one_hot_encoder.fit_transform(X[one_hot_encode_cols])

In [15]:
joblib.dump(one_hot_encoder, 'one_hot_encoder.pkl')
print("One-hot encoder saved.")

One-hot encoder saved.


**Combine Processed Columns**

In [16]:
X_processed = np.hstack((X[label_encode_cols].values, scaled_numerical, one_hot_encoded))

In [17]:
X_train, X_test, y_train, y_test = train_test_split(X_processed, y, test_size=0.2, random_state=42)

In [18]:
model = LogisticRegression(max_iter=1000, random_state=42)
model.fit(X_train, y_train)

**Saving the Trained Logistic Regression Model**


In [19]:
model.score(X_test,y_test)

0.8183108587650816

In [20]:
model.score(X_test,y_test)

0.8183108587650816

In [21]:
model_file = 'logistic_regression_model.pkl'
joblib.dump(model, model_file)

['logistic_regression_model.pkl']

In [22]:
!pip install Flask



In [28]:
import pandas as pd
import numpy as np
import joblib
from flask import Flask, request, jsonify
import logging

# إعداد التسجيل
logging.basicConfig(level=logging.INFO)

# تحميل خطوات المعالجة المسبقة والنموذج
label_encoders = joblib.load("label_encoders.pkl")
one_hot_encoder = joblib.load("one_hot_encoder.pkl")
min_max_scaler = joblib.load("min_max_scaler.pkl")
model = joblib.load("logistic_regression_model.pkl")
le_target = joblib.load("label_encoder_target.pkl")

app = Flask(__name__)

def preprocess_data(data):
    df = pd.DataFrame([data])

    label_encode_cols = [
        "Partner",
        "Dependents",
        "PhoneService",
        "PaperlessBilling",
        "gender",
    ]
    one_hot_encode_cols = [
        "MultipleLines",
        "InternetService",
        "OnlineSecurity",
        "OnlineBackup",
        "DeviceProtection",
        "TechSupport",
        "StreamingTV",
        "StreamingMovies",
        "Contract",
        "PaymentMethod",
    ]
    min_max_scale_cols = ["tenure", "MonthlyCharges", "TotalCharges"]

    # إزالة المسافات الزائدة من مدخلات السلسلة
    for col in label_encode_cols + one_hot_encode_cols:
        df[col] = df[col].str.strip()

    # تحويل القيم غير الرقمية إلى NaN وملؤها بمتوسط العمود
    df[min_max_scale_cols] = df[min_max_scale_cols].replace(" ", np.nan).astype(float)
    df[min_max_scale_cols] = df[min_max_scale_cols].fillna(df[min_max_scale_cols].mean())

    # ترميز التسميات للأعمدة المحددة
    for col in label_encode_cols:
        le = label_encoders[col]
        df[col] = le.transform(df[col])

    # ترميز واحد للأعمدة المحددة
    one_hot_encoded = one_hot_encoder.transform(df[one_hot_encode_cols])

    # تطبيق المقياس الأدنى والأقصى على الأعمدة المحددة
    scaled_numerical = min_max_scaler.transform(df[min_max_scale_cols])

    # دمج الأعمدة المعالجة في مصفوفة واحدة
    X_processed = np.hstack((df[label_encode_cols].values, scaled_numerical, one_hot_encoded.toarray()))

    return X_processed

@app.route('/predict', methods=['POST'])
def predict():
    data = request.json

    # إعداد قائمة الحقول المطلوبة من JSON المدخل
    required_fields = [
        "gender", "SeniorCitizen", "Partner", "Dependents", "tenure",
        "PhoneService", "MultipleLines", "InternetService", "OnlineSecurity",
        "OnlineBackup", "DeviceProtection", "TechSupport", "StreamingTV",
        "StreamingMovies", "Contract", "PaperlessBilling", "PaymentMethod",
        "MonthlyCharges", "TotalCharges"
    ]

    missing_fields = [field for field in required_fields if field not in data]

    if missing_fields:
        return jsonify({"success": False, "error": f"Missing fields: {', '.join(missing_fields)}"}), 400

    try:
        # التحقق من نوع القيمة لـ 'tenure'
        if not isinstance(data['tenure'], (int, float)) or data['tenure'] < 0:
            return jsonify({"success": False, "error": "Invalid value for 'tenure'. Must be a non-negative number."}), 400
        
        X_new = preprocess_data(data)
        prediction = model.predict(X_new)
        prediction = le_target.inverse_transform(prediction)
        return jsonify({"success": True, "prediction": "Churn" if prediction[0] == "Yes" else "No Churn"})
    except Exception as e:
        logging.error(f"Prediction error: {str(e)}")
        return jsonify({"success": False, "error": str(e)}), 500

if __name__ == '__main__':
    app.run(debug=True)


 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
INFO:werkzeug:[33mPress CTRL+C to quit[0m
 * Restarting with watchdog (windowsapi)
INFO:werkzeug: * Restarting with watchdog (windowsapi)


SystemExit: 1