In [0]:
import matplotlib.pyplot as plt
import pandas as pd

# Assuming df is your Spark DataFrame with columns: 'gender', 'age', 'time_spent', 'clicked'

df = spark.table("silver_data")

# Convert to Pandas for plotting
pdf = df.select('gender', 'age', 'Daily_Time_Spent_on_Site', 'Clicked_on_Ad', 'Daily_Internet_Usage', 'Area_Income').toPandas()

fig, axes = plt.subplots(1, 5, figsize=(18, 5))

# Gender vs Clicked (% clicked)
gender_pct = pdf.groupby('gender')['Clicked_on_Ad'].mean() * 100
gender_pct.plot(kind='bar', ax=axes[0])
axes[0].set_title('Click Rate by Gender')
axes[0].set_ylabel('% Clicked')

# Age vs Clicked (% clicked by age bin)
pdf['age_bin'] = pd.cut(pdf['age'], bins=20)
age_pct = pdf.groupby('age_bin')['Clicked_on_Ad'].mean() * 100
age_pct.plot(kind='bar', ax=axes[1])
axes[1].set_title('Click Rate by Age Bin')
axes[1].set_ylabel('% Clicked')
axes[1].tick_params(axis='x', rotation=45)

# Time Spent vs Clicked (% clicked by time bin)
pdf['time_spent_bin'] = pd.cut(pdf['Daily_Time_Spent_on_Site'], bins=20)
time_spent_pct = pdf.groupby('time_spent_bin')['Clicked_on_Ad'].mean() * 100
time_spent_pct.plot(kind='bar', ax=axes[2])
axes[2].set_title('Click Rate by Time Spent Bin')
axes[2].set_ylabel('% Clicked')
axes[2].tick_params(axis='x', rotation=45)

# Age vs Clicked (% clicked by age bin)
pdf['internet_bin'] = pd.cut(pdf['Daily_Internet_Usage'], bins=20)
internet_pct = pdf.groupby('internet_bin')['Clicked_on_Ad'].mean() * 100
internet_pct.plot(kind='bar', ax=axes[3])
axes[3].set_title('Click Rate by Daily Internet Usage')
axes[3].set_ylabel('% Clicked')
axes[3].tick_params(axis='x', rotation=45)

# Time Spent vs Clicked (% clicked by time bin)
pdf['income_bin'] = pd.cut(pdf['Area_Income'], bins=20)
income_spent_pct = pdf.groupby('income_bin')['Clicked_on_Ad'].mean() * 100
income_spent_pct.plot(kind='bar', ax=axes[4])
axes[4].set_title('Click Rate by Area Income')
axes[4].set_ylabel('% Clicked')
axes[4].tick_params(axis='x', rotation=45)

plt.tight_layout()