# Prediction of Probability of Return - Logistic Regression

#### Step 1: Import libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder

#### Step 2: Load CSV Dataset

In [3]:
df = pd.read_csv("Customer360Insights.csv")

In [4]:
df.columns = df.columns.str.strip().str.replace(' ', '').str.replace('(', '').str.replace(')', '')

#### Step 3: Clean 'OrderReturn'

In [5]:
df['OrderReturn'] = df['OrderReturn'].fillna('False')
df['OrderReturn'] = df['OrderReturn'].astype(str).str.strip().str.capitalize()

In [6]:
df['Category'] = df['Category'].astype(str).str.strip().str.title()
df['CampaignSchema'] = df['CampaignSchema'].astype(str).str.strip().str.title()
df['Country'] = df['Country'].astype(str).str.strip().str.title()
df['Gender'] = df['Gender'].astype(str).str.strip().str.capitalize()

#### Step 4: Prepare features + target

In [7]:
features = ['CustomerID','Category', 'CampaignSchema', 'Country', 'Gender']
target = 'OrderReturn'

In [8]:
# Prepare dataframe for modeling

In [9]:
df_model = df[features + [target]].dropna()

#### Step 5: Encode categorical variables

In [11]:
for col in features:
    le = LabelEncoder()
    df_model[col] = le.fit_transform(df_model[col].astype(str))

In [12]:
# Map target variable

In [13]:
df_model[target] = df_model[target].map({'False': 0, 'True': 1})

#### Step 6: Split data

In [15]:
X = df_model[features]
y = df_model[target]

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

#### Step 7: Fit logistic regression model

In [17]:
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

In [18]:
# Predict

In [19]:
y_pred = model.predict(X_test)
y_prob = model.predict_proba(X_test)[:,1]

#### Step 8: Output results

In [20]:
print("\n--- Classification Report ---")
print(classification_report(y_test, y_pred, zero_division=0))


--- Classification Report ---
              precision    recall  f1-score   support

           0       0.89      1.00      0.94       532
           1       0.00      0.00      0.00        68

    accuracy                           0.89       600
   macro avg       0.44      0.50      0.47       600
weighted avg       0.79      0.89      0.83       600



In [21]:
print("\n--- Confusion Matrix ---")
print(confusion_matrix(y_test, y_pred))


--- Confusion Matrix ---
[[532   0]
 [ 68   0]]


#### Step 9: Output probabilities

In [24]:
output_prob = X_test.copy()
output_prob['ActualReturn'] = y_test
output_prob['PredictedReturnProb(%)'] = y_prob * 100

In [28]:
print("\n--- Predicted Probabilities ---")
print(output_prob.head(10))


--- Predicted Probabilities ---
      CustomerID  Category  CampaignSchema  Country  Gender  ActualReturn  \
1860         113         0               2        1       1             0   
353          353         1               4        2       0             0   
1333         308         0               0        1       0             0   
905          905         1               3        7       0             0   
1289         822         0               2        3       0             0   
1273         899         4               0        8       1             0   
938          938         2               3        8       1             0   
1731         618         1               1        6       1             0   
65            65         2               3        5       0             0   
1323         579         3               0        2       0             0   

      PredictedReturnProb(%)  
1860               13.416494  
353                10.327618  
1333               13.5806

#### Step 10: Prediction Done

In [26]:
print("\nPrediction complete")


Prediction complete
