In [1]:
import pandas as pd

In [3]:
data_20 = pd.read_csv("Data/churn-bigml-20.csv")
data_80 = pd.read_csv("Data/churn-bigml-80.csv")

In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import joblib

# Load training data
# Assuming training and testing data are loaded as DataFrames
training_data = pd.read_csv("Data/churn-bigml-20.csv")  # Replace with the actual file path
testing_data = pd.read_csv("Data/churn-bigml-20.csv")  # Replace with the actual file path

# Separate features and target
X_train = training_data.drop("Churn", axis=1)
y_train = training_data["Churn"]

X_test = testing_data.drop("Churn", axis=1)
y_test = testing_data["Churn"]

# Identify column types
categorical_columns = ["State", "International plan", "Voice mail plan"]
numeric_columns = [col for col in X_train.columns if col not in categorical_columns]

# Preprocessing pipeline
numeric_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(handle_unknown="ignore")

preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, numeric_columns),
        ("cat", categorical_transformer, categorical_columns),
    ]
)

# Create a pipeline with preprocessing and model
model_pipeline = Pipeline(
    steps=[
        ("preprocessor", preprocessor),
        ("classifier", LogisticRegression(random_state=42, max_iter=1000)),
    ]
)

# Train the model
model_pipeline.fit(X_train, y_train)

# Serialize and dump the model
joblib.dump(model_pipeline, "logistic_regression_pipeline.pkl")
print("Model saved as logistic_regression_pipeline.pkl")

# Apply the same preprocessing to testing data
print("Predictions from trained model:")
y_pred = model_pipeline.predict(X_test)
print(classification_report(y_test, y_pred))

# Example of loading the saved model and using it
loaded_model = joblib.load("logistic_regression_pipeline.pkl")
y_pred_loaded = loaded_model.predict(X_test)
print("Predictions from loaded model:")
print(classification_report(y_test, y_pred_loaded))



Model saved as logistic_regression_pipeline.pkl
Predictions from trained model:
              precision    recall  f1-score   support

       False       0.90      0.98      0.94       572
        True       0.73      0.34      0.46        95

    accuracy                           0.89       667
   macro avg       0.81      0.66      0.70       667
weighted avg       0.87      0.89      0.87       667

Predictions from loaded model:
              precision    recall  f1-score   support

       False       0.90      0.98      0.94       572
        True       0.73      0.34      0.46        95

    accuracy                           0.89       667
   macro avg       0.81      0.66      0.70       667
weighted avg       0.87      0.89      0.87       667



In [13]:


# Load the saved model
model_pipeline = joblib.load("Model/logistic_regression_pipeline.pkl")

# Example row for testing (replace with actual data)
example_row = {
    "State": "NY",                  # Example state
    "Account length": 120,          # Example account length
    "Area code": 415,               # Example area code
    "International plan": "No",     # Yes/No
    "Voice mail plan": "Yes",       # Yes/No
    "Number vmail messages": 10,    # Example number
    "Total day minutes": 180.0,     # Example value
    "Total day calls": 80,          # Example value
    "Total day charge": 30.6,       # Example value
    "Total eve minutes": 200.0,     # Example value
    "Total eve calls": 100,         # Example value
    "Total eve charge": 17.0,       # Example value
    "Total night minutes": 250.0,   # Example value
    "Total night calls": 90,        # Example value
    "Total night charge": 11.0,     # Example value
    "Total intl minutes": 12.0,     # Example value
    "Total intl calls": 3,          # Example value
    "Total intl charge": 3.24,      # Example value
    "Customer service calls": 2     # Example value
}

# Convert example row to a DataFrame
example_df = pd.DataFrame([example_row])

# Predict using the loaded model
prediction = model_pipeline.predict(example_df)
probability = model_pipeline.predict_proba(example_df)

# Output prediction results
print(f"Prediction: {prediction[0]}")  # True/False for Churn
print(f"Probability: {probability[0]}")  # Probability of each class


Prediction: False
Probability: [0.93372557 0.06627443]
