In [14]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots  # Import make_subplots
from dash import Dash, dcc, html
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

In [15]:
df = pd.read_csv('/Users/jacksonsorenson/Documents/Pyhton Projects/Market Research Project/Cleaned CSV/cleaned_marketing_campaign.csv')

In [16]:
# Plotyly Dashboard Over K-Mean Clustering Segmenattion Analysis

# Assuming df is your DataFrame
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df.select_dtypes(include='number'))

# Elbow Method for Optimal Clusters
optimal_clusters = 10
distortions = []
for k in range(1, optimal_clusters + 1):
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(X_scaled)
    distortions.append(kmeans.inertia_)

fig_elbow = go.Figure()
fig_elbow.add_trace(go.Scatter(
    x=list(range(1, optimal_clusters + 1)),
    y=distortions,
    mode='lines+markers',
    name='Distortion'
))
fig_elbow.update_layout(
    title='Elbow Method for Optimal Clusters',
    xaxis_title='Number of Clusters',
    yaxis_title='Inertia (Distortion)'
)

# K-Means Clustering Results
optimal_k = 3
kmeans = KMeans(n_clusters=optimal_k, random_state=42)
df['Cluster'] = kmeans.fit_predict(X_scaled)

pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)

fig_kmeans = go.Figure()
fig_kmeans.add_trace(go.Scatter(
    x=X_pca[:, 0],
    y=X_pca[:, 1],
    mode='markers',
    marker=dict(color=df['Cluster'], size=10, opacity=0.7),
    name='Clusters'
))
fig_kmeans.update_layout(
    title='K-Means Clustering Results (PCA-reduced)',
    xaxis_title='PCA Component 1',
    yaxis_title='PCA Component 2'
)

# Cluster Profiles (Individual Subplots)
cluster_profile = df.groupby('Cluster')[
    ['Age', 'Income', 'Total_Spend', 'NumWebPurchases', 'Recency']
].mean().reset_index()

fig_profiles = make_subplots(
    rows=5, cols=1,
    subplot_titles=['Age', 'Income', 'Total Spend', 'Number of Web Purchases', 'Recency']
)

metrics = ['Age', 'Income', 'Total_Spend', 'NumWebPurchases', 'Recency']
for i, metric in enumerate(metrics, start=1):
    fig_profiles.add_trace(
        go.Bar(
            x=cluster_profile['Cluster'],
            y=cluster_profile[metric],
            name=metric
        ),
        row=i, col=1
    )

fig_profiles.update_layout(
    height=1500,
    title='Cluster Profiles Split by Metrics',
    showlegend=False
)

# Dash App Layout
app = Dash(__name__)
app.layout = html.Div([
    html.H1("Customer Segmentation Dashboard", style={'textAlign': 'center'}),

    html.Div([
        html.H2("Elbow Method for Optimal Clusters"),
        dcc.Graph(figure=fig_elbow)
    ]),

    html.Div([
        html.H2("K-Means Clustering Results"),
        dcc.Graph(figure=fig_kmeans)
    ]),

    html.Div([
        html.H2("Cluster Profiles (Split by Metrics)"),
        dcc.Graph(figure=fig_profiles)
    ])
])

if __name__ == '__main__':
    app.run_server(debug=True)
