# **Generating Dataset for Barcharts (Both Horizontal and Vertical Combined) with Labels**

In [None]:
import random
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import os
import json

In [None]:
title_words = [
    "Financial Report",
    "Revenue Analysis",
    "Market Trends",
    "Profit Margins",
    "Sales Performance",
    "Expense Breakdown",
    "Quarterly Earnings",
    "Cost Analysis",
    "Budget Allocation",
    "Investment Portfolio",
    "Cash Flow Overview",
    "Balance Sheet",
    "Return on Investment",
    "Stock Market Analysis",
    "Asset Management",
    "Financial Forecast",
    "Risk Assessment",
    "Creditworthiness",
    "Market Volatility",
    "Savings and Investments",
    "Financial Health",
    "Debt Management",
    "Capital Expenditure",
    "Financial Ratios",
    "Economic Indicators",
    "Market Research",
    "Revenue Projections",
    "Financial Modeling",
    "Dividend Yield",
    "Cost of Goods Sold",
    "Stock Valuation",
    "Tax Planning",
    "Income Statement",
    "Expense Tracking",
    "Cash Flow Management",
    "Portfolio Diversification",
    "Asset Allocation",
    "Budget Planning",
    "Investment Strategies",
    "Return on Assets",
    "Market Analysis",
    "Financial Planning",
    "Risk Management",
    "Credit Analysis",
    "Market Trends",
    "Savings Strategies",
    "Financial Stability",
    "Debt Reduction",
    "Capital Budgeting",
    "Financial Metrics",
    "Economic Trends"
]


In [None]:
x_axis_labels = [
    "Year",
    "Quarter",
    "Month",
    "Time",
    "Fiscal Year",
    "Revenue Source",
    "Product Category",
    "Market Segment",
    "Investment Type",
    "Expense Category",
    "Sales Channel",
    "Customer Type",
    "Asset Class",
    "Industry Sector",
    "Geographic Region",
    "Financial Metric",
    "Product Line",
    "Service Offering",
    "Payment Method",
    "Billing Cycle"
]


In [None]:
y_axis_labels = [
    "Revenue ($)",
    "Profit ($)",
    "Market Share (%)",
    "Expenses ($)",
    "Gross Margin ($)",
    "Net Income ($)",
    "Operating Costs ($)",
    "Return on Investment (%)",
    "Earnings per Share (EPS)",
    "Dividends per Share ($)",
    "Cost of Goods Sold ($)",
    "Interest Expense ($)",
    "Depreciation & Amortization ($)",
    "Net Profit Margin (%)",
    "Debt-to-Equity Ratio",
    "Cash Flow from Operations ($)",
    "Asset Turnover Ratio",
    "Earnings Before Tax ($)",
    "Inventory Turnover",
    "Return on Assets (%)"
    "$"
]


In [None]:
labels = [
    "Stocks", "Bonds", "Estate", "Commodities", "Cryptocurrency", "Savings", "Forex",
    "Funds", "Retirement", "Options", "Futures", "Government", "Corporate",
    "Municipal", "REITs", "Metals", "Collectibles", "Cash", "Equity",
    "Hedge", "Venture", "Sovereign", "Income", "Equities", "Derivatives",
    "Money", "Products", "Treasuries", "Annuities", "IRAs", "401(k)s",
    "Pensions", "Accounts", "Certificates", "Mortgage", "Bills",
    "Notes", "Bonds", "Assets", "Obligations",
    "Paper", "Preferred", "Common", "Convertible", "Emerging",
    "High-Yield", "Blue-Chip", "Growth", "Value", "Small-Cap", "Large-Cap"
]


In [None]:
def generate_random_color(low=40, high=200, grey=False):
    """
    Function to generate a random color.
    
    The higher the colors, the lighter the color
    """
    if grey == True:
        red = green = blue = np.random.randint(30, 190)
    else:
        red = np.random.randint(low, high)
        green = np.random.randint(low, high)
        blue = np.random.randint(low, high)
    return (red/255, green/255, blue/255)

In [None]:
def generate_random_barcharts(photos_gen):
    dataset = []

    # Create a directory to store the PNG files and JSON data
    data_folder = 'base_Data_Barcharts(Labels)'
    os.makedirs(data_folder, exist_ok=True)

    # Generate bar chart images and corresponding text data
    for i in range(photos_gen):
        # Ensure that 50% of the charts are horizontal and 50% are vertical
        is_vertical = i % 2 == 0

        num_bars = random.randint(3, 7)  # Varying number of bars
        bar_colors = [generate_random_color() for _ in range(num_bars)]
        bar_labels = random.sample(labels, num_bars)
        bar_values = [random.randint(100, 1000) for _ in range(num_bars)]
        title = random.choice(title_words)
        x_label = random.choice(x_axis_labels)
        y_label = random.choice(y_axis_labels)
        font_size = random.randint(10, 16)
        font_family = random.choice(['serif', 'sans-serif', 'monospace'])
        font_style = random.choice(['normal', 'italic', 'oblique'])
        font_weight = random.choice(['normal', 'bold'])

        if random.choice([True, False]):
            sns.set_style("whitegrid")
        else:
            sns.set_style("darkgrid")

        # Random width and height
        random_width = random.randint(6, 10)
        random_height = random.randint(4, 8)

        fig, ax = plt.subplots(figsize=(random_width, random_height))

        if is_vertical:
            bars = sns.barplot(x=bar_labels, y=bar_values, palette=bar_colors, ax=ax)
            chart_x_label = x_label
            chart_y_label = y_label

            # Annotating the vertical bars with their values
            for bar in bars.patches:
                ax.text(bar.get_x() + bar.get_width() / 2, bar.get_height(),
                        int(bar.get_height()), 
                        ha='center', va='bottom', fontstyle=font_style, fontsize=font_size - 2,
                        fontfamily=font_family, fontweight=font_weight)

        else:
            bars = sns.barplot(x=bar_values, y=bar_labels, palette=bar_colors, ax=ax)
            chart_x_label = x_label
            chart_y_label = y_label

            # Annotating the horizontal bars with their values
            for bar in bars.patches:
                ax.text(bar.get_width(), bar.get_y() + bar.get_height() / 2, 
                        int(bar.get_width()), 
                        ha='left', va='center', fontstyle=font_style, fontsize=font_size - 2,
                        fontfamily=font_family, fontweight=font_weight)

        ax.set_xlabel(chart_x_label, fontstyle=font_style, fontsize=font_size, fontfamily=font_family, fontweight=font_weight)
        ax.set_title(title, fontstyle=font_style, fontsize=font_size, fontfamily=font_family, fontweight=font_weight)
        ax.set_ylabel(chart_y_label, fontstyle=font_style, fontsize=font_size, fontfamily=font_family, fontweight=font_weight)

        # Construct text description in the specified format
        if is_vertical:
            text_description = f"TITLE | {title} <0x0A> {x_label} | {y_label} <0x0A> "
            text_description += " <0x0A> ".join([f"{label} | {value}" for label, value in zip(bar_labels, bar_values)])
        else:
            text_description = f"TITLE | {title} <0x0A> {y_label} | {x_label} <0x0A> "
            text_description += " <0x0A> ".join([f"{label} | {value}" for label, value in zip(reversed(bar_labels), reversed(bar_values))])

        # Save the figure with a transparent background to the folder
        fig.patch.set_facecolor('none')
        chart_type = "vertical" if is_vertical else "horizontal"
        image_filename = os.path.join(data_folder, f'chart_{i + 1}.png')
        plt.tight_layout() 
        fig.savefig(image_filename, transparent=True)
        plt.close(fig)

        # Append the data to the dataset
        data = {
            'image': f'chart_{i + 1}.png',
            'text': text_description
        }
        dataset.append(data)

    # Save the dataset as a JSON file
    json_filename = os.path.join(data_folder, 'test_BarCharts.json')
    with open(json_filename, 'w') as json_file:
        json.dump(dataset, json_file, indent=4)

In [None]:
generate_random_barcharts(100)  # Generate 10 random charts and save them in a single JSON