In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path   
from sklearn.linear_model import LogisticRegression

data_path = Path('Data/bank-additional-full.csv')
df = pd.read_csv(data_path, sep=';')

# 1. Prepare Data
# Convert 'y' to 0 and 1
df['target'] = df['y'].map({'no': 0, 'yes': 1})

X = df[['campaign']]  # Feature: Number of calls
y = df['target']      # Target: 0 or 1

# 2. Fit Logistic Regression
log_reg = LogisticRegression()
log_reg.fit(X, y)

# 3. Simulate Call Numbers 1 through 20 to see the trend
calls_range = np.arange(1, 21).reshape(-1, 1)
# predict_proba returns [Prob_0, Prob_1]. We want Prob_1 (Success)
probs = log_reg.predict_proba(calls_range)[:, 1]

# 4. Visualize & Analyze
plt.figure(figsize=(10, 6))
plt.plot(calls_range, probs, marker='o', linestyle='-', color='blue')
plt.axhline(y=0.05, color='r', linestyle='--', label='5% Efficiency Threshold') # Business logic line
plt.title("Logistic Regression: Probability of Success by Call Number")
plt.xlabel("Number of Calls")
plt.ylabel("Predicted Probability of 'Yes'")
plt.xticks(range(1, 21))
plt.legend()
plt.show()

# 5. Answer the Question
# Find the first call number where probability drops below 5% (0.05)
threshold = 0.05
drop_off_point = calls_range[probs < threshold][0][0]
print(f"Recommendation: Stop calling after {drop_off_point - 1} attempts.")

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

# 1. Aggregate Data (Create a mini dataset of Call Count vs Conversion Rate)
agg_data = df.groupby('campaign')['target'].mean().reset_index()
agg_data.columns = ['campaign', 'conversion_rate']

# Limit to first 20 calls (outliers skew regression)
agg_data = agg_data[agg_data['campaign'] <= 20]

X_agg = agg_data[['campaign']]
y_agg = agg_data['conversion_rate']

# 2. Polynomial Transformation (Degree 2 or 3 to allow a curve)
poly = PolynomialFeatures(degree=3) 
X_poly = poly.fit_transform(X_agg)

# 3. Fit Linear Regression
lin_reg = LinearRegression()
lin_reg.fit(X_poly, y_agg)

# 4. Predict Trend
y_pred = lin_reg.predict(X_poly)

# 5. Visualize
plt.figure(figsize=(10, 6))
plt.scatter(X_agg, y_agg, color='black', label='Actual Data')
plt.plot(X_agg, y_pred, color='blue', linewidth=2, label='Polynomial Regression Fit')
plt.title("Regression Model: Conversion Rate Trend")
plt.xlabel("Number of Calls")
plt.ylabel("Conversion Rate")
plt.legend()
plt.show()

# Insight: Look at the peak of the blue line. 
# That is the "average number of calls for maximum success."