In [7]:
import pandas as pd
# Load the cleaned datasets
powerball_df = pd.read_csv("powerball_cleaned.csv")
megamillions_df = pd.read_csv("mega_millions_cleaned.csv")
pick10_df = pd.read_csv("pick10_cleaned.csv")

# Add 'Game' column to each for identification
powerball_df['Game'] = 'Powerball'
megamillions_df['Game'] = 'Mega Millions'
pick10_df['Game'] = 'Pick 10'

# Melt all to long formatudsing PB_, MM_, P10_ prefixes as constants
def reshape_lottery_data(df, prefix):
    draw_col = 'Draw Date'
    number_cols = [col for col in df.columns if col.startswith(prefix)]
    df_melted = df.melt(id_vars=[draw_col, 'Game'], value_vars=number_cols, var_name='Position', value_name='Number')
    df_melted[draw_col] = pd.to_datetime(df_melted[draw_col], errors='coerce')
    df_melted['Number'] = pd.to_numeric(df_melted['Number'], errors='coerce')
    return df_melted.dropna()

# Reshape each dataset
pb_long = reshape_lottery_data(powerball_df, 'PB')
mm_long = reshape_lottery_data(megamillions_df, 'MM')
p10_long = reshape_lottery_data(pick10_df, 'P10')

# Combine for visual summary
combined_lottery = pd.concat([pb_long, mm_long, p10_long])

# Save final combined long format CSV for use in Tableau/Power BI
combined_csv_path = "Lottery_Long_Combined_All.csv"
combined_lottery.to_csv(combined_csv_path, index=False)

combined_csv_path


'Lottery_Long_Combined_All.csv'

In [13]:
# Re-import necessary libraries and dataset
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.backends.backend_pdf import PdfPages

# Read the rebuilt dataset
combined_lottery = pd.read_csv("Lottery_Long_Combined_All.csv")
combined_lottery['Draw Date'] = pd.to_datetime(combined_lottery['Draw Date'], errors='coerce')

# Prepare the figures list
figures = []

# Histogram
plt.figure(figsize=(10, 6))
sns.histplot(data=combined_lottery, x='Number', bins=50, color='skyblue')
plt.title('Histogram of All Drawn Numbers')
plt.xlabel('Number')
plt.ylabel('Frequency')
figures.append(plt.gcf())
plt.close()

# Bubble Chart
bubble_data = combined_lottery.groupby(['Game', 'Number']).size().reset_index(name='Frequency')
plt.figure(figsize=(10, 6))
sns.scatterplot(data=bubble_data, x='Number', y='Frequency', size='Frequency', hue='Game', alpha=0.6, sizes=(20, 300))
plt.title('Bubble Chart of Number Frequency by Game')
plt.xlabel('Number')
plt.ylabel('Frequency')
figures.append(plt.gcf())
plt.close()

# Scatterplot
plt.figure(figsize=(12, 6))
sns.scatterplot(data=combined_lottery, x='Draw Date', y='Number', hue='Game', alpha=0.6)
plt.title('Scatterplot of Drawn Numbers Over Time')
plt.xlabel('Draw Date')
plt.ylabel('Number')
figures.append(plt.gcf())
plt.close()

# Bar Chart
bar_data = combined_lottery[['Number']].dropna()
bar_data = bar_data.groupby('Number').size().reset_index(name='Frequency').sort_values('Number')
plt.figure(figsize=(12, 6))
sns.barplot(data=bar_data, x='Number', y='Frequency')
plt.title('Bar Chart of Total Number Frequency Across All Games')
plt.xlabel('Number')
plt.ylabel('Frequency')
figures.append(plt.gcf())
plt.close()

# Export all to a PDF
final_pdf_path = "Lottery_Visuals_Report_Final.pdf"
with PdfPages(final_pdf_path) as pdf:
    for fig in figures:
        pdf.savefig(fig)

final_pdf_path


'Lottery_Visuals_Report_Final.pdf'