In [4]:
# Cell 1: Imports and Data Loading
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


# Data Pre-processing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Model Training
from sklearn.linear_model import LogisticRegression

# Model Evaluation
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Read the CSV file (make sure the file is in the same folder as your notebook)
df = pd.read_csv("customer_purchases.csv")

print("File loaded successfully. DataFrame head:")
print(df.head())


File loaded successfully. DataFrame head:
   Age  Estimated_Salary  Previous_Purchases  Purchased
0   25             55000                   2          0
1   35             75000                   5          1
2   42             60000                   1          0
3   28             80000                   8          1
4   55            120000                  10          1


In [6]:
# Cell 2: Data Inspection and Preparation
if df is not None:
    print("\n--- Data Types ---")
    print(df.dtypes)

    print("\n--- Missing Values ---")
    print(df.isnull().sum())

    # Define features (X) and target (y)
    # This assumes 'Purchased' is the target column.
    X = df.drop('Purchased', axis=1)
    y = df['Purchased']

    print("\n--- Feature and Target Shapes ---")
    print("Features (X) shape:", X.shape)
    print("Target (y) shape:", y.shape)

# ---


--- Data Types ---
Age                   int64
Estimated_Salary      int64
Previous_Purchases    int64
Purchased             int64
dtype: object

--- Missing Values ---
Age                   0
Estimated_Salary      0
Previous_Purchases    0
Purchased             0
dtype: int64

--- Feature and Target Shapes ---
Features (X) shape: (30, 3)
Target (y) shape: (30,)


In [8]:
# Cell 3: Train-Test Split
if 'X' in locals():
    # Split data into training and testing sets
    # It's good practice to use 'stratify=y' in classification to maintain the same
    # proportion of target classes in both the train and test sets.
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=y
    )

    print("\n--- Shapes after Splitting ---")
    print("X_train shape:", X_train.shape)
    print("X_test shape:", X_test.shape)

# ---


--- Shapes after Splitting ---
X_train shape: (24, 3)
X_test shape: (6, 3)


In [9]:
# Cell 4: Feature Scaling
if 'X_train' in locals():
    # Initialize the scaler
    scaler = StandardScaler()

    # Fit the scaler ONLY on the training data and transform it
    X_train_scaled = scaler.fit_transform(X_train)

    # Use the SAME fitted scaler to transform the test data
    X_test_scaled = scaler.transform(X_test)

    print("\n--- Data after Scaling (first 5 training samples) ---")
    print(X_train_scaled[:5])

# ---


--- Data after Scaling (first 5 training samples) ---
[[-0.05439426  0.1094913   0.4178784 ]
 [ 1.45190826  1.40377656  1.59777035]
 [-0.8577556  -1.06713166 -1.05698654]
 [ 0.24686625  0.50169896  0.71285139]
 [-0.8577556   0.03104977  0.71285139]]


In [10]:
# Cell 5: Model Training
if 'X_train_scaled' in locals():
    # Initialize and train the Logistic Regression model
    model = LogisticRegression(max_iter=1000, random_state=42)
    model.fit(X_train_scaled, y_train)
    print("\nModel training complete.")

# ---


Model training complete.


In [11]:
# Cell 6: Model Evaluation
if 'model' in locals():
    # Make predictions on the scaled test data
    y_pred = model.predict(X_test_scaled)

    # Calculate and print the accuracy
    accuracy = accuracy_score(y_test, y_pred)
    print(f"\nModel Accuracy: {accuracy:.4f}")

    # Print a detailed classification report
    print("\n--- Classification Report ---")
    print(classification_report(y_test, y_pred))

    # Display the confusion matrix
    print("\n--- Confusion Matrix ---")
    cm = confusion_matrix(y_test, y_pred)
    print(cm)

# ---


Model Accuracy: 1.0000

--- Classification Report ---
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       1.00      1.00      1.00         3

    accuracy                           1.00         6
   macro avg       1.00      1.00      1.00         6
weighted avg       1.00      1.00      1.00         6


--- Confusion Matrix ---
[[3 0]
 [0 3]]


In [12]:
# Cell 7: Inference Functions
if 'model' in locals():
    def predict_purchase(sample_dict, trained_model, fitted_scaler):
        """
        Predicts customer purchase for a single sample.
        Args:
            sample_dict (dict): A dictionary with feature names as keys.
            trained_model: The trained machine learning model.
            fitted_scaler: The StandardScaler object fitted on the training data.
        Returns:
            dict: A dictionary with the prediction and probability.
        """
        # Convert the dictionary to a DataFrame with the correct column order
        sample_df = pd.DataFrame([sample_dict])[X.columns] # Ensures column order

        # Scale the sample using the fitted scaler
        sample_scaled = fitted_scaler.transform(sample_df)

        # Make prediction and get probabilities
        prediction = trained_model.predict(sample_scaled)[0]
        probability = trained_model.predict_proba(sample_scaled)[0][1]

        return {
            'prediction': int(prediction),
            'probability_of_purchase': round(probability, 4)
        }

    def predict_batch(df_batch, trained_model, fitted_scaler):
        """
        Predicts customer purchase for a batch of samples in a DataFrame.
        """
        # Ensure the batch DataFrame has the correct column order
        df_batch_ordered = df_batch[X.columns]

        # Scale the features
        batch_scaled = fitted_scaler.transform(df_batch_ordered)

        # Get predictions and probabilities
        predictions = trained_model.predict(batch_scaled)
        probabilities = trained_model.predict_proba(batch_scaled)[:, 1]

        # Add results to the original DataFrame
        result_df = df_batch.copy()
        result_df['prediction'] = predictions.astype(int)
        result_df['probability_of_purchase'] = np.round(probabilities, 4)
        return result_df

    print("\nInference functions are ready.")





Inference functions are ready.


In [13]:
    # --- Example Usage ---
    print("\n--- Single Sample Inference Example ---")
    example_sample = {'Age': 36, 'Estimated_Salary': 82000, 'Previous_Purchases': 7}
    single_result = predict_purchase(example_sample, model, scaler)
    print(f"Prediction for sample {example_sample}: {single_result}")

    print("\n--- Batch Inference Example ---")
    # Use the original, unscaled X_test for this example
    batch_df = X_test.head(3).copy()
    batch_results = predict_batch(batch_df, model, scaler)
    print(batch_results)


--- Single Sample Inference Example ---
Prediction for sample {'Age': 36, 'Estimated_Salary': 82000, 'Previous_Purchases': 7}: {'prediction': 1, 'probability_of_purchase': np.float64(0.8244)}

--- Batch Inference Example ---
    Age  Estimated_Salary  Previous_Purchases  prediction  \
24   23             35000                   0           0   
13   33             78000                   5           1   
14   26             48000                   2           0   

    probability_of_purchase  
24                   0.0114  
13                   0.5667  
14                   0.0563  
