In [14]:
import pandas as pd
import plotly as pl
import plotly.express as px
import plotly.io as pio
from plotly.subplots import make_subplots

bank_data = pd.read_csv("bank.csv", sep=';')

fig_all = px.histogram(
    bank_data,
    x="duration",
    nbins=500,
    title="Histogram czasu trwania rozmowy – wszystkie dane",
    labels={"duration": "Czas rozmowy (sekundy)"},
    template="plotly_white"
)
fig_all.show()


In [15]:
bank_data_success = bank_data[bank_data['y'] == 'yes']

fig_success = px.histogram(
    bank_data_success,
    x="duration",
    nbins=500,
    title="Histogram czasu trwania rozmowy – sukcesy",
    labels={"duration": "Czas rozmowy (sekundy)"},
    template="plotly_white"
)
fig_success.show()



In [16]:
bank_data_failure = bank_data[bank_data['y'] == 'no']

fig_failure = px.histogram(
    bank_data_failure,
    x="duration",
    nbins=500,
    title="Histogram czasu trwania rozmowy – porazki",
    labels={"duration": "Czas rozmowy (sekundy)"},
    template="plotly_white"
)
fig_failure.show()

Rozmowy, które trwają dłuzej bardziej sie sprzedaja, czyli wzbudzaja zaufanie klienta

In [17]:
bank_data_age_over_25 = bank_data[bank_data['age'] > 25]

fig_age_over_25 = px.histogram(
    bank_data_age_over_25,
    x="balance",
    nbins=500,
    title="Histogram salda konta (balance) dla osób powyżej 25 roku życia",
    labels={"balance": "Saldo konta (EUR)"},
    template="plotly_white"
)

fig_age_over_25.update_yaxes(title_text="Liczba klientów")

fig_age_over_25.show()

In [18]:
import ipywidgets as widgets


def update_histogram(min_age):
    filtered_data = bank_data[bank_data["age"] > min_age]
    
    fig = px.histogram(
        filtered_data,
        x="balance",
        nbins=50,
        title=f"Histogram salda konta (balance) dla osób powyżej {min_age} roku życia",
        labels={"balance": "Saldo konta (EUR)", "count": "Liczba klientów"},
        template="plotly_white"
    )
    
    fig.update_yaxes(title_text="Liczba klientów")
    fig.show()


age_slider = widgets.IntSlider(
    value=25,
    min=int(bank_data["age"].min()),
    max=int(bank_data["age"].max()),
    step=1,
    description="Minimalny wiek:"
)

widgets.interact(update_histogram, min_age=age_slider)

interactive(children=(IntSlider(value=25, description='Minimalny wiek:', max=87, min=19), Output()), _dom_clas…

<function __main__.update_histogram(min_age)>

In [19]:
time = []
success = []

for t in range(0, bank_data['duration'].max(), 30):
    data = bank_data[bank_data['duration'] > t]

    total = len(data)

    if total == 0:
        continue

    positives = len(data[data['y'] == 'yes'])
    ratio = positives / total * 100  # procent pozytywnych


    time.append(t)
    success.append(ratio)

# Wykres
fig = px.line(
    x=time,
    y=success,
    labels={"x": "Próg t (sekundy)", "y": "Procent pozytywnych odpowiedzi"},
    title="Procent pozytywnych odpowiedzi (y='yes') dla rozmów dłuższych niż t sekund",
    markers=True,
    template="plotly_white"
)

fig.show()



In [20]:
balance_treshold = []
positive_ratios = []

for b in range(0, bank_data['balance'].max(), 30):
    filtered = bank_data[bank_data['balance'] > b]

    total = len(filtered)

    if total == 0:
        continue

    positives = len(filtered[filtered['y'] == 'yes'])
    ratio = positives / total * 100

    balance_treshold.append(b)
    positive_ratios.append(ratio)

fig = px.line(
    x=balance_treshold,
    y=positive_ratios,
    labels={"x": "Próg balance (saldo)", "y": "Procent pozytywnych odpowiedzi"},
    title="Procent pozytywnych odpowiedzi (y='yes') dla rozmów powyzej salda b",
    markers=True,
    template="plotly_white"
)

fig.show()

In [None]:
from sklearn.model_selection import train_test_split
import plotly.graph_objects as go
import numpy as np

train_df, test_df = train_test_split(bank_data, test_size=0.5, random_state=129)

train_df.head()

def evaluate_threshold(df, threshold):
    positive = df['y'] == 'yes'
    above = df['balance'] > threshold

    FP = ((above) & (~positive)).sum()  # false positives
    FN = ((~above) & (positive)).sum()  # false negatives
    TP = ((above) & (positive)).sum()
    TN = ((~above) & (~positive)).sum()

    FP_rate = FP / (FP + TP) * 100 if (FP + TP) > 0 else 0
    FN_rate = FN / (FN + TN) * 100 if (FN + TN) > 0 else 0

    return FP_rate, FN_rate

thresholds = np.linspace(bank_data["balance"].min(), bank_data["balance"].max(), 50)
results_train = [evaluate_threshold(train_df, t) for t in thresholds]
results_test = [evaluate_threshold(test_df, t) for t in thresholds]

train_fp, train_fn = zip(*results_train)
test_fp, test_fn = zip(*results_test)

fig = go.Figure()
fig.add_trace(go.Scatter(
    x=thresholds,
    y=train_fp,
    mode='lines',
    name='False Positive (%) - trening',
    line=dict(color='red', dash='dash')
))
fig.add_trace(go.Scatter(
    x=thresholds,
    y=train_fn,
    mode='lines',
    name='False Negative (%) - trening',
    line=dict(color='green', dash='dash')
))

fig.add_trace(go.Scatter(
    x=thresholds,
    y=test_fp,
    mode='lines',
    name='False Positive (%) - test',
    line=dict(color='red')
))
fig.add_trace(go.Scatter(
    x=thresholds,
    y=test_fn,
    mode='lines',
    name='False Negative (%) - test',
    line=dict(color='green')
))

fig.update_layout(
    title="Analiza błędów modelu dla różnych progów salda",
    xaxis_title="Próg salda (balance)",
    yaxis_title="Odsetek błędów [%]",
    legend_title="Rodzaj błędu",
    template="plotly_white",
    width=900,
    height=600
)

fig.show()

total_error = np.array(train_fp) + np.array(train_fn)
best_idx = np.argmin(total_error)
best_threshold = thresholds[best_idx]
print(f"Najlepszy próg wg zbioru treningowego: {best_threshold:.2f}")

print(f"Błąd FP (train): {train_fp[best_idx]:.2f}%")
print(f"Błąd FN (train): {train_fn[best_idx]:.2f}%")

print(f"Błąd FP (test): {test_fp[best_idx]:.2f}%")
print(f"Błąd FN (test): {test_fn[best_idx]:.2f}%")


Najlepszy próg wg zbioru treningowego: 71188.00
Błąd FP (train): 0.00%
Błąd FN (train): 11.68%
Błąd FP (test): 0.00%
Błąd FN (test): 11.37%
