In [18]:
# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Sample dataset
data = {
    'Age': [22, 25, 47, 35, 52, 23, 40, 29, 30, 50],
    'Gender': ['Male', 'Female', 'Female', 'Male', 'Male', 'Female', 'Male', 'Female', 'Male', 'Female'],
    'Monthly_Income': [3000, 4000, 5000, 4500, 6000, 3200, 5200, 4100, 4700, 5800],
    'Device_Type': ['Mobile', 'Laptop', 'TV', 'Mobile', 'TV', 'Laptop', 'TV', 'Mobile', 'Laptop', 'TV'],
    'Subscribed': ['Yes', 'No', 'Yes', 'No', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes']
}

# Create DataFrame
df = pd.DataFrame(data)

# Encode categorical variables
le_gender = LabelEncoder()
df['Gender'] = le_gender.fit_transform(df['Gender'])  # Male=1, Female=0

le_device = LabelEncoder()
df['Device_Type'] = le_device.fit_transform(df['Device_Type'])  # Mobile=1, Laptop=2, TV=0

le_subscribed = LabelEncoder()
df['Subscribed'] = le_subscribed.fit_transform(df['Subscribed'])  # Yes=1, No=0

# Features and target
X = df[['Age', 'Gender', 'Monthly_Income', 'Device_Type']]
y = df['Subscribed']

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

# Logistic Regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_train)

# Evaluation
print("Accuracy:", accuracy_score(y_train, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_train, y_pred))
print("\nClassification Report:\n", classification_report(y_train, y_pred))

# Predict for a new customer
new_customer = pd.DataFrame({
    'Age': [28],
    'Gender': ['Male'],
    'Monthly_Income': [4000],
    'Device_Type': ['Laptop']
})

# Encode and scale new customer
new_customer['Gender'] = le_gender.transform(new_customer['Gender'])
new_customer['Device_Type'] = le_device.transform(new_customer['Device_Type'])
new_customer_scaled = scaler.transform(new_customer)

prediction = model.predict(new_customer_scaled)
prediction_label = le_subscribed.inverse_transform(prediction)
print("\nPrediction for new customer:", prediction_label[0])


Accuracy: 0.8571428571428571

Confusion Matrix:
 [[1 1]
 [0 5]]

Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.50      0.67         2
           1       0.83      1.00      0.91         5

    accuracy                           0.86         7
   macro avg       0.92      0.75      0.79         7
weighted avg       0.88      0.86      0.84         7


Prediction for new customer: No


In [20]:
# Get predicted probabilities for the test set
probabilities = model.predict_proba(X_train)  # returns array with [P(No), P(Yes)]

# Print probability for each example
# Print probability, actual, and predicted for each test example
for i, prob in enumerate(probabilities):
    print(f"Example {i+1}: "
          f"Probability [No={prob[0]:.4f}, Yes={prob[1]:.4f}] "
          f"-- Actual: {y_train.iloc[i]} "
          f"-- Predicted: {y_pred[i]}")



Example 1: Probability [No=0.4870, Yes=0.5130] -- Actual: 1 -- Predicted: 1
Example 2: Probability [No=0.5275, Yes=0.4725] -- Actual: 0 -- Predicted: 0
Example 3: Probability [No=0.1900, Yes=0.8100] -- Actual: 1 -- Predicted: 1
Example 4: Probability [No=0.1692, Yes=0.8308] -- Actual: 1 -- Predicted: 1
Example 5: Probability [No=0.1069, Yes=0.8931] -- Actual: 1 -- Predicted: 1
Example 6: Probability [No=0.3618, Yes=0.6382] -- Actual: 0 -- Predicted: 1
Example 7: Probability [No=0.1577, Yes=0.8423] -- Actual: 1 -- Predicted: 1


In [13]:
y_test

8    1
1    0
5    0
0    1
7    0
2    1
9    1
4    1
3    0
Name: Subscribed, dtype: int32