# Logistic Regression

we are going to use the age and income of the user to predict wheather they aregoing to buy (1) or not (0)

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

# Creating Random Data Set

In [4]:
np.random.seed(42)

# Generate Random Data for customer variables

In [5]:
age = np.random.randint(20,60,100)
income = np.random.randint(30,120,100)

# Target variable: Purchase (1 if likely to purchase, 0 otherwise)

In [6]:
purchase = (0.05 * age + 0.03 * income + np.random.randn(100) > 5).astype(int)

# Creating A the Data Frame

In [7]:
data = pd.DataFrame({'Age': age, 'Income': income, 'Purchase': purchase})

# Spliting data into Testing and Trainig Mode

In [8]:
x = ['age', 'income']
y = ['purchase']

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size = 0.3, random_state=42)

# Logistic Regression

In [None]:
log_reg_model = LogisticRegression()
log_reg_model.fit(x_train, y_train)

# Make Predictions

In [None]:
y_pred = log_reg_model.predict(x_test)
y_pred_prob = log_reg_model.predict_proba(x_test)[:, 1]

# Step 5: Evaluate Model Performance

In [None]:
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)

In [None]:
print("Confusion Matrix:\n", conf_matrix)
print("\nClassification Report:\n", class_report)
print("Accuracy:", accuracy)

# Step 6: Visualize Results

In [None]:
plt.figure(figsize=(10, 6))

# Scatter plot of actual data

In [None]:
plt.scatter(data['Age'], data['Income'], c=data['Purchase'], cmap='bwr', alpha=0.7, label='Actual Data')

# Plot the decision boundary (for visualization)

In [None]:
age_range = np.linspace(20, 60, 100)
income_range = np.linspace(30, 120, 100)
age_grid, income_grid = np.meshgrid(age_range, income_range)
decision_boundary = log_reg_model.predict(np.c_[age_grid.ravel(), income_grid.ravel()])
decision_boundary = decision_boundary.reshape(age_grid.shape)

plt.contourf(age_grid, income_grid, decision_boundary, levels=[-0.1, 0.5, 1.1], colors=['blue', 'red'], alpha=0.2)

In [None]:
plt.xlabel("Age")
plt.ylabel("Income")
plt.title("Logistic Regression: Purchase Prediction")
plt.legend(["Actual Data", "Decision Boundary"])
plt.grid(True)
plt.show()