In [1]:
import google.generativeai as genai
gemini_key = "AIzaSyDinVYuxQRYYnIVbRZdzgOQsoN6I9cHWuM"
def gemini_model(prompt):
    
    genai.configure(api_key=gemini_key)

    model = genai.GenerativeModel(model_name="gemini-1.5-flash")

    template = prompt

    response = model.generate_content(template)
    return str(response.text)

  from .autonotebook import tqdm as notebook_tqdm


In [12]:
import csv
import random

# Function to pick a random entry from a CSV file
def pick_random_from_csv(file_name):
    with open(file_name, newline='') as csvfile:
        reader = csv.reader(csvfile)
        next(reader)  # Skip header row
        rows = list(reader)
        return random.choice(rows)[0]

# Pick a random chart theme and trend
def get_random_chart_theme_and_trend():
    chart_theme = pick_random_from_csv('themes.csv')
    chart_trend = pick_random_from_csv('trends.csv')
    no_of_rows = random.randint(1, 12) 
    no_of_cols = random.randint(1, 10)   

    return chart_theme, chart_trend, no_of_rows, no_of_cols


In [13]:
import re
def extract_json_section(input_string, tag):
    """Extract JSON section between specified XML-like tags."""
    pattern = f"<{tag}>(.*?)</{tag}>"
    match = re.search(pattern, input_string, re.DOTALL)
    if match:
        return match.group(1).strip()
    return None

In [14]:
import json 
def parse_json(json_str):
    """Attempt to parse JSON with relaxed rules."""
    try:
        return json.loads(json_str)
    except json.JSONDecodeError:
        # Try replacing single quotes with double quotes and parsing again
        try:
            fixed_json_str = json_str.replace("'", '"')
            return json.loads(fixed_json_str)
        except json.JSONDecodeError as e:
            raise ValueError(f"Invalid JSON: {e}")

radar chart

In [51]:
import re
import os
import json
import plotly.graph_objects as go  

def save_json_to_unique_folder(input_string, chart_type='radar chart', base_folder='dataset'):
    """Extract JSON sections, save them into a uniquely numbered folder, and plot the radar chart."""
    
    # Extract the JSON sections
    data_json = extract_json_section(input_string, 'data')
    config_json = extract_json_section(input_string, 'config')
    layout_json = extract_json_section(input_string, 'layout')
    csv_data = extract_json_section(input_string, 'csv')
    description = extract_json_section(input_string, 'description')
    
    if not data_json or not config_json or not layout_json:
        raise ValueError("One or more JSON or csv sections (data, config, layout) were not found in the input string.")
    
    # Create a base folder (e.g., dataset) if it doesn't exist
    if not os.path.exists(base_folder):
        os.makedirs(base_folder)

    # Find the next unique folder number
    folder_number = len([name for name in os.listdir(base_folder) if os.path.isdir(os.path.join(base_folder, name))]) + 1
    unique_folder = os.path.join(base_folder, f"{chart_type}_{folder_number}")

    # Create the unique folder
    os.makedirs(unique_folder, exist_ok=True)
    
    # Save the JSON sections as files in the unique folder
    with open(os.path.join(unique_folder, 'data.json'), 'w') as data_file:
        json.dump(json.loads(data_json), data_file, indent=4)

    with open(os.path.join(unique_folder, 'config.json'), 'w') as config_file:
        json.dump(json.loads(config_json), config_file, indent=4)

    with open(os.path.join(unique_folder, 'layout.json'), 'w') as layout_file:
        json.dump(json.loads(layout_json), layout_file, indent=4)
    
    # Save the CSV data as a file in the unique folder
    with open(os.path.join(unique_folder, 'data.csv'), 'w') as csv_file:
        csv_file.write(csv_data)
    with open(os.path.join(unique_folder, 'description.txt'), 'w') as description_file:
        description_file.write(description)

    # Parse JSON strings
    data = parse_json(data_json)
    layout = parse_json(layout_json)
    config = parse_json(config_json) if config_json else None

    if not data or not layout:
        raise ValueError("Invalid or missing data or layout JSON.")
    
    # Prepare traces for the plot
    traces = []
    for trace_data in data:
        trace_type = trace_data.get('type')
        if trace_type == 'scatterpolar':
            trace = go.Scatterpolar(
                r=trace_data.get('r', []),
                theta=trace_data.get('theta', []),
                mode=trace_data.get('mode', 'markers+lines'),
                name=trace_data.get('name', ''),
                marker=dict(size=trace_data['marker']['size'], color=trace_data['marker']['color']) if 'marker' in trace_data else None,
                line=dict(color=trace_data['line']['color'], width=trace_data['line']['width']) if 'line' in trace_data else None,
                text=trace_data.get('theta', []),  # Label each point with its corresponding theta value
                textposition='top center'  # Position the labels above each point
            )
        else:
            raise ValueError(f"Unsupported trace type: {trace_type}")

        traces.append(trace)
    
    # Create figure with the extracted layout and data
    fig = go.Figure(data=traces, layout=layout)

    # Save the figure as an image in the same folder
    image_path = os.path.join(unique_folder, "radar_chart.png")
    fig.write_image(image_path)


    print(f"JSON files and radar chart image successfully saved in folder: {unique_folder}")


In [52]:
type_of_chart = "radar"

In [54]:
def generate_dataset(type_of_chart):
    chart_theme, chart_trend, no_of_rows, no_of_cols = get_random_chart_theme_and_trend()
    prompt = '''
You are an expert in generating data and creating JSON configurations compatible with Plotly for visualizing that data. You will receive several key characteristics about the data and your task is to generate:
1. A `data.json` file that contains the data.
2. A `config.json` file that defines the configuration settings for Plotly.
3. A `layout.json` file that defines the layout for the chart.
4. A `data.csv` file that provides a CSV representation of the graph data.

Your output should be highly professional, accurate, and formatted as described below.

### Input Parameters:
1. **Theme of the Chart:** {chart_theme}
2. **Type of Chart:** {type_of_chart}
3. **Trends in the Data:** The trends in the data should include as many of the following as possible: {chart_trend}
4. **Data Diversity:** The data should be diverse and include several outliers.
5. **Data Dimensions:**
   - Number of Rows: {no_of_rows}
   - Number of Columns: {no_of_cols}
6. **Related Nouns:** List nouns related to the theme along the first column and row of the table.

### Output Requirements:

1. **Data (data.json):**
   - Generate JSON data based on the provided theme, trends, and dimensions.
   - Ensure the data contains diversity and relevant outliers.

2. **Configuration (config.json):**
   - Define the Plotly configuration for rendering the chart.
   - Include any relevant settings such as responsive behavior, scroll zooming, etc.

3. **Layout (layout.json):**
   - Define the layout of the chart including:
     - Title reflecting the chart's theme.
     - Labels for the x-axis and y-axis.
     - A legend for distinguishing between data series.
     - Annotations for key data points, especially outliers.
     - Margins, gridlines, and other formatting details.

4. **Chart Description:**
   - Provide a detailed description wrapped within <description start> and <description end> tags.
   - Focus on:
     - The chart’s theme.
     - The general and individual trends in the data.
     - Any comparisons between data series.
     - The presence and impact of outliers.

5. **CSV Representation (data.csv):**
   - Generate a CSV file that represents the graph data in tabular form.
   - Include relevant headers and data to reflect the graph's content.

### Example Structure:
<data> - this is must be enclosed in <data> tags
[
  {{
    "type": "scatterpolar",
    "r": [10, 15, 13, 17, 20],
    "theta": ["A", "B", "C", "D", "E"],
    "mode": "lines+markers",
    "name": "Series 1",
    "marker": {{
      "size": 8,
      "color": "blue"
    }},
    "line": {{
      "color": "blue",
      "width": 2
    }}
  }},
  {{
    "type": "scatterpolar",
    "r": [12, 18, 16, 14, 22],
    "theta": ["A", "B", "C", "D", "E"],
    "mode": "lines+markers",
    "name": "Series 2",
    "marker": {{
      "size": 8,
      "color": "red"
    }},
    "line": {{
      "color": "red",
      "width": 2
    }}
  }}
]
</data>

<config> - this must be enclosed in <config> tags
{{
  "responsive": true,
  "displayModeBar": true,
  "displaylogo": false
}}
</config>

<layout> - this must be enclosed in <layout> tags

{{
  "polar": {{
    "radialaxis": {{
      "visible": true,
      "range": [0, 25],
      "title": {{
        "text": "Values"
      }}
    }},
    "angularaxis": {{
      "tickmode": "array",
      "tickvals": ["A", "B", "C", "D", "E"],
      "ticktext": ["A", "B", "C", "D", "E"]
    }}
  }},
  "title": {{
    "text": "Radar Chart Example",
    "font": {{
      "size": 18
    }}
  }}
}}
</layout>

<description>
[Detailed description of the chart]
</description>

<csv>
[Generated CSV Data]
</csv>
Make sure the all the generated files are enclosed within the appropriate tags and follow the specified structure and you have not used ``` anywhere in the file. Ensure that the JSON files are formatted correctly and the CSV file contains the necessary data for the chart.
'''.format(
    chart_theme=chart_theme,
    type_of_chart= type_of_chart,
    chart_trend=chart_trend,
    no_of_rows= no_of_rows,
    no_of_cols= no_of_cols
)

    output = gemini_model(prompt)
    save_json_to_unique_folder(output, chart_type=type_of_chart)
    
    

generate_dataset(type_of_chart)


JSON files and radar chart image successfully saved in folder: dataset\radar_74


Scatter Plot

In [9]:
import re
import os
import json
import plotly.graph_objects as go

def save_json_to_unique_folder(input_string, chart_type='scatter chart', base_folder='dataset'):
    """Extract JSON sections and save them into a uniquely numbered folder and generate scatter chart."""

    # Extract the JSON sections
    data_json = extract_json_section(input_string, 'data')
    config_json = extract_json_section(input_string, 'config')
    layout_json = extract_json_section(input_string, 'layout')
    csv_data = extract_json_section(input_string, 'csv')

    if not data_json or not config_json or not layout_json:
        raise ValueError("One or more JSON or CSV sections (data, config, layout) were not found in the input string.")

    # Create a base folder (e.g., dataset) if it doesn't exist
    if not os.path.exists(base_folder):
        os.makedirs(base_folder)

    # Find the next unique folder number
    folder_number = len([name for name in os.listdir(base_folder) if os.path.isdir(os.path.join(base_folder, name))]) + 1
    unique_folder = os.path.join(base_folder, f"{chart_type}_{folder_number}")

    # Create the unique folder
    os.makedirs(unique_folder, exist_ok=True)

    # Save the JSON sections as files in the unique folder
    with open(os.path.join(unique_folder, 'data.json'), 'w') as data_file:
        json.dump(json.loads(data_json), data_file, indent=4)

    with open(os.path.join(unique_folder, 'config.json'), 'w') as config_file:
        json.dump(json.loads(config_json), config_file, indent=4)

    with open(os.path.join(unique_folder, 'layout.json'), 'w') as layout_file:
        json.dump(json.loads(layout_json), layout_file, indent=4)

    # Save the CSV data as a file in the unique folder
    with open(os.path.join(unique_folder, 'data.csv'), 'w') as csv_file:
        csv_file.write(csv_data)

    # Parse JSON strings
    data = json.loads(data_json) if data_json else None
    layout = json.loads(layout_json) if layout_json else None

    if not data or not layout:
        raise ValueError("Invalid or missing data or layout JSON.")

    # Prepare traces for the scatter plot
    traces = []
    for trace_data in data:
        trace_type = trace_data.get('type')
        if trace_type == 'scatter':
            trace = go.Scatter(
                x=trace_data.get('x', []),
                y=trace_data.get('y', []),
                mode=trace_data.get('mode', 'markers+lines'),
                name=trace_data.get('name', ''),
                marker=dict(size=trace_data['marker']['size'], color=trace_data['marker']['color']) if 'marker' in trace_data else None,
                line=dict(color=trace_data['line']['color'], width=trace_data['line']['width']) if 'line' in trace_data else None
            )
        else:
            raise ValueError(f"Unsupported trace type: {trace_type}")

        traces.append(trace)

    # Create figure with the extracted layout and data
    fig = go.Figure(data=traces, layout=layout)

    # Save the figure as an image in the same folder
    image_path = os.path.join(unique_folder, "scatter_chart.png")
    fig.write_image(image_path)

    print(f"JSON files and scatter chart image successfully saved in folder: {unique_folder}")



In [10]:
type_of_chart = 'scatter'

In [12]:
chart_theme, chart_trend, no_of_rows, no_of_cols = get_random_chart_theme_and_trend()


In [15]:
prompt = f"""
You are an expert in generating data and creating JSON configurations compatible with Plotly for visualizing that data. You will receive several key characteristics about the data and your task is to generate:
1. A `data.json` file that contains the data.
2. A `config.json` file that defines the configuration settings for Plotly.
3. A `layout.json` file that defines the layout for the chart.
4. A `data.csv` file that provides a CSV representation of the graph data.

Your output should be highly professional, accurate, and formatted as described below.

### Input Parameters:
1. **Theme of the Chart:** {chart_theme}
2. **Type of Chart:** scatter
3. **Trends in the Data:** The trends in the data should include as many of the following as possible: {chart_trend}
4. **Data Diversity:** The data should be diverse and include several outliers.
5. **Data Dimensions:**
- Number of Rows: {no_of_rows}
- Number of Columns: {no_of_cols}
6. **Related Nouns:** List nouns related to the theme along the first column and row of the table.

### Output Requirements:

1. **Data (data.json):**
- Generate JSON data based on the provided theme, trends, and dimensions.
- Ensure the data contains diversity and relevant outliers.

2. **Configuration (config.json):**
- Define the Plotly configuration for rendering the chart.
- Include any relevant settings such as responsive behavior, scroll zooming, etc.

3. **Layout (layout.json):**
- Define the layout of the chart including:
    - Title reflecting the chart's theme.
    - Labels for the x-axis and y-axis.
    - A legend for distinguishing between data series.
    - Annotations for key data points, especially outliers.
    - Margins, gridlines, and other formatting details.

4. **Chart Description:**
- Provide a detailed description wrapped within <description start> and <description end> tags.
- Focus on:
    - The chart’s theme.
    - The general and individual trends in the data.
    - Any comparisons between data series.
    - The presence and impact of outliers.

5. **CSV Representation (data.csv):**
- Generate a CSV file that represents the graph data in tabular form.
- Include relevant headers and data to reflect the graph's content.

### Example Structure:
<data> - this must be enclosed in <data> tags
[
    {{
        "type": "scatter",
        "x": [1, 2, 3, 4, 5],
        "y": [10, 15, 13, 17, 14],
        "mode": "markers+lines",
        "name": "Sample Scatter",
        "marker": {{
            "size": 10,
            "color": "blue"
        }},
        "line": {{
            "color": "blue",
            "width": 2
        }}
    }}
]

</data>

<config> - this must be enclosed in <config> tags
{{
    "displayModeBar": true,
    "scrollZoom": true
}}

</config>

<layout> - this must be enclosed in <layout> tags

{{
    "title": {{
        "text": "Sample Scatter Plot"
    }},
    "xaxis": {{
        "title": {{
            "text": "X Axis"
        }}
    }},
    "yaxis": {{
        "title": {{
            "text": "Y Axis"
        }}
    }}
}}

</layout>

<description>
[Detailed description of the chart]
</description>

<csv>
[Generated CSV Data]
</csv>
Make sure all the generated files are enclosed within the appropriate tags and follow the specified structure and you have not used ``` anywhere in the file. Ensure that the JSON files are formatted correctly and the CSV file contains the necessary data for the chart.
"""


In [17]:
def generate_scatter(prompt):
    output = gemini_model(prompt)
    save_json_to_unique_folder(output, chart_type="scatter")
    
    

generate_scatter(prompt)


JSON files and scatter chart image successfully saved in folder: dataset\scatter_39


Gantt Chart

In [57]:
type_of_chart = 'Gantt Charts'

In [30]:
import re
import os
import json
import plotly.express as px

def save_json_to_unique_folder(input_string, chart_type='gantt chart', base_folder='dataset'):
    """Extract JSON sections, save them into a uniquely numbered folder, and plot the Gantt chart."""

    # Extract the JSON sections
    data_json = extract_json_section(input_string, 'data')
    config_json = extract_json_section(input_string, 'config')
    layout_json = extract_json_section(input_string, 'layout')
    csv_data = extract_json_section(input_string, 'csv')
    # description = extract_json_section(input_string, 'description')

    if not data_json or not config_json or not layout_json:
        raise ValueError("One or more JSON or csv sections (data, config, layout) were not found in the input string.")

    # Create a base folder (e.g., dataset) if it doesn't exist
    if not os.path.exists(base_folder):
        os.makedirs(base_folder)

    # Find the next unique folder number
    folder_number = len([name for name in os.listdir(base_folder) if os.path.isdir(os.path.join(base_folder, name))]) + 1
    unique_folder = os.path.join(base_folder, f"{chart_type}_{folder_number}")

    # Create the unique folder
    os.makedirs(unique_folder, exist_ok=True)

    # Save the JSON sections as files in the unique folder
    with open(os.path.join(unique_folder, 'data.json'), 'w') as data_file:
        json.dump(json.loads(data_json), data_file, indent=4)

    with open(os.path.join(unique_folder, 'config.json'), 'w') as config_file:
        json.dump(json.loads(config_json), config_file, indent=4)

    with open(os.path.join(unique_folder, 'layout.json'), 'w') as layout_file:
        json.dump(json.loads(layout_json), layout_file, indent=4)

    # Save the CSV data as a file in the unique folder
    with open(os.path.join(unique_folder, 'data.csv'), 'w') as csv_file:
        csv_file.write(csv_data)
    # with open(os.path.join(unique_folder, 'description.txt'), 'w') as description_file:
    #     description_file.write(description)

    # Parse JSON strings
    data = parse_json(data_json)
    layout = parse_json(layout_json)
    config = parse_json(config_json) if config_json else None

    if not data or not layout:
        raise ValueError("Invalid or missing data or layout JSON.")

    # Gantt chart requires specific fields like 'Task', 'Start', 'Finish'
    tasks = []
    for task_data in data:
        task_name = task_data.get('task')
        start_date = task_data.get('start')
        finish_date = task_data.get('finish')
        if not task_name or not start_date or not finish_date:
            raise ValueError("Missing 'task', 'start', or 'finish' field in data for Gantt chart.")

        tasks.append(dict(Task=task_name, Start=start_date, Finish=finish_date))

    # Plot Gantt chart using Plotly Express
    fig = px.timeline(tasks, x_start="Start", x_end="Finish", y="Task", title=layout.get('title', {}).get('text', 'Gantt Chart'))

    # Apply layout settings from the extracted layout JSON
    fig.update_layout(**layout)

    # Save the figure as an image in the same folder
    image_path = os.path.join(unique_folder, "gantt_chart.png")
    fig.write_image(image_path)

    print(f"JSON files and Gantt chart image successfully saved in folder: {unique_folder}")



In [27]:
chart_theme, chart_trend, no_of_rows, no_of_cols = get_random_chart_theme_and_trend()

prompt = f'''
You are an expert in generating data and creating JSON configurations compatible with Plotly for visualizing that data. You will receive several key characteristics about the data, and your task is to generate:
1. A `data.json` file that contains the data.
2. A `config.json` file that defines the configuration settings for Plotly.
3. A `layout.json` file that defines the layout for the chart.
4. A `data.csv` file that provides a CSV representation of the graph data.

Your output should be highly professional, accurate, and formatted as described below.

### Input Parameters:
1. **Theme of the Chart:** {chart_theme}
2. **Type of Chart:** {type_of_chart}
3. **Trends in the Data:** The trends in the data should include as many of the following as possible: {chart_trend}
4. **Data Diversity:** The data should be diverse and include several outliers.
5. **Data Dimensions:**
   - Number of Rows: {no_of_rows}
   - Number of Columns: {no_of_cols}
6. **Related Nouns:** List nouns related to the theme along the first column and row of the table.

### Output Requirements:

1. **Data (data.json):**
   - Generate JSON data based on the provided theme, trends, and dimensions.
   - Ensure the data contains diversity and relevant outliers.

2. **Configuration (config.json):**
   - Define the Plotly configuration for rendering the chart.
   - Include any relevant settings such as responsive behavior, scroll zooming, etc.

3. **Layout (layout.json):**
   - Define the layout of the chart including:
     - Title reflecting the chart's theme.
     - Labels for the x-axis and y-axis.
     - A legend for distinguishing between data series.
     - Annotations for key data points, especially outliers.
     - Margins, gridlines, and other formatting details.

4. **Chart Description:**
   - Provide a detailed description wrapped within <description start> and <description end> tags.
   - Focus on:
     - The chart’s theme.
     - The general and individual trends in the data.
     - Any comparisons between data series.
     - The presence and impact of outliers.

5. **CSV Representation (data.csv):**
   - Generate a CSV file that represents the graph data in tabular form.
   - Include relevant headers and data to reflect the graph's content.

### Example Structure:

<data> - this must be enclosed in <data> tags
<data>
[
  {{
    "task": "Music Festival",
    "start": "2024-01-15",
    "finish": "2024-05-15"
  }},
  {{
    "task": "Art Exhibition",
    "start": "2024-02-01",
    "finish": "2024-05-28"
  }},
  {{
    "task": "Theater Performance",
    "start": "2024-01-01",
    "finish": "2024-05-01"
  }},
  {{
    "task": "Film Festival",
    "start": "2024-01-15",
    "finish": "2024-05-15"
  }},
  {{
    "task": "Food Festival",
    "start": "2024-02-01",
    "finish": "2024-05-28"
  }}
]
</data>

<config> - this must be enclosed in <config> tags
{{
  "responsive": true,
  "displayModeBar": true,
  "displaylogo": false
}}
</config>

<layout> - this must be enclosed in <layout> tags
{{
  "title": {{
    "text": "Gantt Chart Example",
    "font": {{
      "size": 18
    }}
  }},
  "xaxis": {{
    "title": "Dates",
    "type": "date"
  }},
  "yaxis": {{
    "title": "Tasks"
  }},
  "bargap": 0.2
}}
</layout>

<description>
[Detailed description of the chart]
</description>

<csv>
[Generated CSV Data]
</csv>

Make sure all the generated files are enclosed within the appropriate tags and follow the specified structure. Ensure that the JSON files are formatted correctly and the CSV file contains the necessary data for the chart.
'''


In [28]:
def generate_dataset(prompt):
    output = gemini_model(prompt)
    print(output)
    save_json_to_unique_folder(output, chart_type=type_of_chart)


In [31]:
generate_dataset(prompt)

## Grocery Price Comparison

### <data>
[
  {
    "Grocery Store": "Walmart",
    "Milk (gallon)": 3.49,
    "Eggs (dozen)": 2.99,
    "Bread (loaf)": 2.49,
    "Chicken (lb)": 4.99,
    "Beef (lb)": 8.99,
    "Apples (lb)": 1.49,
    "Bananas (lb)": 0.59,
    "Oranges (lb)": 1.29,
    "Potatoes (lb)": 0.99
  },
  {
    "Grocery Store": "Target",
    "Milk (gallon)": 3.79,
    "Eggs (dozen)": 3.29,
    "Bread (loaf)": 2.69,
    "Chicken (lb)": 5.49,
    "Beef (lb)": 9.49,
    "Apples (lb)": 1.69,
    "Bananas (lb)": 0.69,
    "Oranges (lb)": 1.49,
    "Potatoes (lb)": 1.19
  },
  {
    "Grocery Store": "Kroger",
    "Milk (gallon)": 3.99,
    "Eggs (dozen)": 3.49,
    "Bread (loaf)": 2.79,
    "Chicken (lb)": 5.99,
    "Beef (lb)": 9.99,
    "Apples (lb)": 1.89,
    "Bananas (lb)": 0.79,
    "Oranges (lb)": 1.59,
    "Potatoes (lb)": 1.29
  },
  {
    "Grocery Store": "Trader Joe's",
    "Milk (gallon)": 2.99,
    "Eggs (dozen)": 2.49,
    "Bread (loaf)": 1.99,
    "Chicken (lb)": 4.49

ValueError: Missing 'task', 'start', or 'finish' field in data for Gantt chart.

Sunburst

In [33]:
type_of_chart = 'sunburst'

In [34]:
import re
import os
import json
import plotly.express as px

def save_json_to_unique_folder(input_string, chart_type='sunburst chart', base_folder='dataset'):
    """Extract JSON sections, save them into a uniquely numbered folder, and plot a Sunburst chart."""

    # Extract the JSON sections
    data_json = extract_json_section(input_string, 'data')
    config_json = extract_json_section(input_string, 'config')
    layout_json = extract_json_section(input_string, 'layout')
    csv_data = extract_json_section(input_string, 'csv')

    if not data_json or not config_json or not layout_json:
        raise ValueError("One or more JSON or csv sections (data, config, layout) were not found in the input string.")

    # Create a base folder (e.g., dataset) if it doesn't exist
    if not os.path.exists(base_folder):
        os.makedirs(base_folder)

    # Find the next unique folder number
    folder_number = len([name for name in os.listdir(base_folder) if os.path.isdir(os.path.join(base_folder, name))]) + 1
    unique_folder = os.path.join(base_folder, f"{chart_type}_{folder_number}")

    # Create the unique folder
    os.makedirs(unique_folder, exist_ok=True)

    # Save the JSON sections as files in the unique folder
    with open(os.path.join(unique_folder, 'data.json'), 'w') as data_file:
        json.dump(json.loads(data_json), data_file, indent=4)

    with open(os.path.join(unique_folder, 'config.json'), 'w') as config_file:
        json.dump(json.loads(config_json), config_file, indent=4)

    with open(os.path.join(unique_folder, 'layout.json'), 'w') as layout_file:
        json.dump(json.loads(layout_json), layout_file, indent=4)

    # Save the CSV data as a file in the unique folder
    with open(os.path.join(unique_folder, 'data.csv'), 'w') as csv_file:
        csv_file.write(csv_data)

    # Parse JSON strings
    data = parse_json(data_json)
    layout = parse_json(layout_json)
    config = parse_json(config_json) if config_json else None

    if not data or not layout:
        raise ValueError("Invalid or missing data or layout JSON.")

    # Sunburst chart requires hierarchical data with 'parents' and 'labels'
    labels = [item.get('label') for item in data if 'label' in item]
    parents = [item.get('parent') for item in data if 'parent' in item]
    values = [item.get('value') for item in data if 'value' in item]

    if not labels or not parents or not values:
        raise ValueError("Missing 'label', 'parent', or 'value' field in data for Sunburst chart.")

    # Plot Sunburst chart using Plotly Express
    fig = px.sunburst(
        names=labels,
        parents=parents,
        values=values,
        title=layout.get('title', {}).get('text', 'Sunburst Chart')
    )

    # Apply layout settings from the extracted layout JSON
    fig.update_layout(**layout)

    # Save the figure as an image in the same folder
    image_path = os.path.join(unique_folder, "sunburst_chart.png")
    fig.write_image(image_path)

    print(f"JSON files and Sunburst chart image successfully saved in folder: {unique_folder}")


In [35]:
chart_theme, chart_trend, no_of_rows, no_of_cols = get_random_chart_theme_and_trend()

prompt = f'''
You are an expert in generating data and creating JSON configurations compatible with Plotly for visualizing that data. You will receive several key characteristics about the data, and your task is to generate:
1. A `data.json` file that contains the data.
2. A `config.json` file that defines the configuration settings for Plotly.
3. A `layout.json` file that defines the layout for the chart.
4. A `data.csv` file that provides a CSV representation of the graph data.

Your output should be highly professional, accurate, and formatted as described below.

### Input Parameters:
1. **Theme of the Chart:** {chart_theme}
2. **Type of Chart:** Sunburst Chart
3. **Trends in the Data:** The trends in the data should include as many of the following as possible: {chart_trend}
4. **Data Diversity:** The data should be diverse and include several outliers.
5. **Data Dimensions:**
   - Number of Rows: {no_of_rows}
   - Number of Columns: {no_of_cols}
6. **Related Nouns:** List nouns related to the theme along the first column and row of the table.

### Output Requirements:

1. **Data (data.json):**
   - Generate JSON data based on the provided theme, trends, and dimensions.
   - Ensure the data contains diversity and relevant outliers.

2. **Configuration (config.json):**
   - Define the Plotly configuration for rendering the chart.
   - Include any relevant settings such as responsive behavior, scroll zooming, etc.

3. **Layout (layout.json):**
   - Define the layout of the chart including:
     - Title reflecting the chart's theme.
     - Labels for the x-axis and y-axis.
     - A legend for distinguishing between data series.
     - Annotations for key data points, especially outliers.
     - Margins, gridlines, and other formatting details.

4. **Chart Description:**
   - Provide a detailed description wrapped within <description start> and <description end> tags.
   - Focus on:
     - The chart’s theme.
     - The general and individual trends in the data.
     - Any comparisons between data series.
     - The presence and impact of outliers.

5. **CSV Representation (data.csv):**
   - Generate a CSV file that represents the graph data in tabular form.
   - Include relevant headers and data to reflect the graph's content.

### Example Structure:

<data> - this must be enclosed in <data> tags
<data>
[
  {{
    "label": "Root",
    "parent": "",
    "value": 100
  }},
  {{
    "label": "Branch A",
    "parent": "Root",
    "value": 50
  }},
  {{
    "label": "Branch B",
    "parent": "Root",
    "value": 30
  }},
  {{
    "label": "Leaf A1",
    "parent": "Branch A",
    "value": 25
  }},
  {{
    "label": "Leaf A2",
    "parent": "Branch A",
    "value": 25
  }},
  {{
    "label": "Leaf B1",
    "parent": "Branch B",
    "value": 15
  }},
  {{
    "label": "Leaf B2",
    "parent": "Branch B",
    "value": 15
  }}
]
</data>

<config> - this must be enclosed in <config> tags
{{
  "responsive": true,
  "displayModeBar": true,
  "displaylogo": false
}}
</config>

<layout> - this must be enclosed in <layout> tags
{{
  "title": {{
    "text": "Sunburst Chart Example",
    "font": {{
      "size": 18
    }}
  }},
  "showlegend": true
}}
</layout>

<description>
This Sunburst chart represents a hierarchy of nodes, starting from the 'Root' with two branches: 'Branch A' and 'Branch B'. Each branch is further subdivided into leaves. The size of each node is determined by its value, with 'Root' representing the total value of 100. Branch A has a total value of 50, and Branch B has a value of 30. The leaves represent further subdivisions with corresponding values, providing insights into the relative sizes of different subcategories within the hierarchy.
</description>

<csv>
Label,Parent,Value
Root,,100
Branch A,Root,50
Branch B,Root,30
Leaf A1,Branch A,25
Leaf A2,Branch A,25
Leaf B1,Branch B,15
Leaf B2,Branch B,15
</csv>

Make sure all the generated files are enclosed within the appropriate tags and follow the specified structure. Ensure that the JSON files are formatted correctly and the CSV file contains the necessary data for the chart.
'''



In [36]:
def generate_dataset(prompt):
    output = gemini_model(prompt)
    print(output)
    save_json_to_unique_folder(output, chart_type=type_of_chart)


In [37]:
generate_dataset(prompt)

## Social Engagement Calendar Sunburst Chart

### Data (data.json)

```json
<data>
[
  {
    "label": "Social Media",
    "parent": "",
    "value": 100
  },
  {
    "label": "Facebook",
    "parent": "Social Media",
    "value": 35
  },
  {
    "label": "Instagram",
    "parent": "Social Media",
    "value": 25
  },
  {
    "label": "Twitter",
    "parent": "Social Media",
    "value": 20
  },
  {
    "label": "LinkedIn",
    "parent": "Social Media",
    "value": 15
  },
  {
    "label": "YouTube",
    "parent": "Social Media",
    "value": 5
  },
  {
    "label": "Content",
    "parent": "Facebook",
    "value": 25
  },
  {
    "label": "Engagement",
    "parent": "Facebook",
    "value": 10
  },
  {
    "label": "Ads",
    "parent": "Facebook",
    "value": 5
  },
  {
    "label": "Stories",
    "parent": "Instagram",
    "value": 15
  },
  {
    "label": "Posts",
    "parent": "Instagram",
    "value": 10
  },
  {
    "label": "Live",
    "parent": "Instagram",
    "value": 5
  },





JSON files and Sunburst chart image successfully saved in folder: dataset\sunburst_45


Area Plots 

In [None]:
generate_dataset(prompt)

## Grocery Price Comparison

### <data>
[
  {
    "Grocery Store": "Walmart",
    "Milk (gallon)": 3.49,
    "Eggs (dozen)": 2.99,
    "Bread (loaf)": 2.49,
    "Chicken (lb)": 4.99,
    "Beef (lb)": 8.99,
    "Apples (lb)": 1.49,
    "Bananas (lb)": 0.59,
    "Oranges (lb)": 1.29,
    "Potatoes (lb)": 0.99
  },
  {
    "Grocery Store": "Target",
    "Milk (gallon)": 3.79,
    "Eggs (dozen)": 3.29,
    "Bread (loaf)": 2.69,
    "Chicken (lb)": 5.49,
    "Beef (lb)": 9.49,
    "Apples (lb)": 1.69,
    "Bananas (lb)": 0.69,
    "Oranges (lb)": 1.49,
    "Potatoes (lb)": 1.19
  },
  {
    "Grocery Store": "Kroger",
    "Milk (gallon)": 3.99,
    "Eggs (dozen)": 3.49,
    "Bread (loaf)": 2.79,
    "Chicken (lb)": 5.99,
    "Beef (lb)": 9.99,
    "Apples (lb)": 1.89,
    "Bananas (lb)": 0.79,
    "Oranges (lb)": 1.59,
    "Potatoes (lb)": 1.29
  },
  {
    "Grocery Store": "Trader Joe's",
    "Milk (gallon)": 2.99,
    "Eggs (dozen)": 2.49,
    "Bread (loaf)": 1.99,
    "Chicken (lb)": 4.49

ValueError: Missing 'task', 'start', or 'finish' field in data for Gantt chart.

Area

In [43]:
type_of_chart = "area"

In [45]:
import re
import os
import json
import plotly.express as px

def save_json_to_unique_folder(input_string, chart_type='area chart', base_folder='dataset'):
    """Extract JSON sections, save them into a uniquely numbered folder, and plot an Area chart."""

    # Extract the JSON sections
    data_json = extract_json_section(input_string, 'data')
    config_json = extract_json_section(input_string, 'config')
    layout_json = extract_json_section(input_string, 'layout')
    csv_data = extract_json_section(input_string, 'csv')

    if not data_json or not config_json or not layout_json:
        raise ValueError("One or more JSON or csv sections (data, config, layout) were not found in the input string.")

    # Create a base folder (e.g., dataset) if it doesn't exist
    if not os.path.exists(base_folder):
        os.makedirs(base_folder)

    # Find the next unique folder number
    folder_number = len([name for name in os.listdir(base_folder) if os.path.isdir(os.path.join(base_folder, name))]) + 1
    unique_folder = os.path.join(base_folder, f"{chart_type}_{folder_number}")

    # Create the unique folder
    os.makedirs(unique_folder, exist_ok=True)

    # Save the JSON sections as files in the unique folder
    with open(os.path.join(unique_folder, 'data.json'), 'w') as data_file:
        json.dump(json.loads(data_json), data_file, indent=4)

    with open(os.path.join(unique_folder, 'config.json'), 'w') as config_file:
        json.dump(json.loads(config_json), config_file, indent=4)

    with open(os.path.join(unique_folder, 'layout.json'), 'w') as layout_file:
        json.dump(json.loads(layout_json), layout_file, indent=4)

    # Save the CSV data as a file in the unique folder
    with open(os.path.join(unique_folder, 'data.csv'), 'w') as csv_file:
        csv_file.write(csv_data)

    # Parse JSON strings
    data = parse_json(data_json)
    layout = parse_json(layout_json)
    config = parse_json(config_json) if config_json else None

    if not data or not layout:
        raise ValueError("Invalid or missing data or layout JSON.")

    # Area chart requires 'x', 'y' and optionally 'category' fields
    x_values = [item.get('x') for item in data if 'x' in item]
    y_values = [item.get('y') for item in data if 'y' in item]
    categories = [item.get('category') for item in data if 'category' in item] if any('category' in item for item in data) else None

    if not x_values or not y_values:
        raise ValueError("Missing 'x' or 'y' field in data for Area chart.")

    # Plot Area chart using Plotly Express
    if categories:
        fig = px.area(x=x_values, y=y_values, color=categories, title=layout.get('title', {}).get('text', 'Area Chart'))
    else:
        fig = px.area(x=x_values, y=y_values, title=layout.get('title', {}).get('text', 'Area Chart'))

    # Apply layout settings from the extracted layout JSON
    fig.update_layout(**layout)

    # Save the figure as an image in the same folder
    image_path = os.path.join(unique_folder, "area_chart.png")
    fig.write_image(image_path)

    print(f"JSON files and Area chart image successfully saved in folder: {unique_folder}")


In [46]:
chart_theme, chart_trend, no_of_rows, no_of_cols = get_random_chart_theme_and_trend()

prompt = f'''
You are an expert in generating data and creating JSON configurations compatible with Plotly for visualizing that data. You will receive several key characteristics about the data, and your task is to generate:
1. A `data.json` file that contains the data.
2. A `config.json` file that defines the configuration settings for Plotly.
3. A `layout.json` file that defines the layout for the chart.
4. A `data.csv` file that provides a CSV representation of the graph data.

Your output should be highly professional, accurate, and formatted as described below.

### Input Parameters:
1. **Theme of the Chart:** {chart_theme}
2. **Type of Chart:** Area Chart
3. **Trends in the Data:** The trends in the data should include as many of the following as possible: {chart_trend}
4. **Data Diversity:** The data should be diverse and include several outliers.
5. **Data Dimensions:**
   - Number of Rows: {no_of_rows}
   - Number of Columns: {no_of_cols}
6. **Related Nouns:** List nouns related to the theme along the first column and row of the table.

### Output Requirements:

1. **Data (data.json):**
   - Generate JSON data based on the provided theme, trends, and dimensions.
   - Ensure the data contains diversity and relevant outliers.

2. **Configuration (config.json):**
   - Define the Plotly configuration for rendering the chart.
   - Include any relevant settings such as responsive behavior, scroll zooming, etc.

3. **Layout (layout.json):**
   - Define the layout of the chart including:
     - Title reflecting the chart's theme.
     - Labels for the x-axis and y-axis.
     - A legend for distinguishing between data series.
     - Annotations for key data points, especially outliers.
     - Margins, gridlines, and other formatting details.

4. **Chart Description:**
   - Provide a detailed description wrapped within <description start> and <description end> tags.
   - Focus on:
     - The chart’s theme.
     - The general and individual trends in the data.
     - Any comparisons between data series.
     - The presence and impact of outliers.

5. **CSV Representation (data.csv):**
   - Generate a CSV file that represents the graph data in tabular form.
   - Include relevant headers and data to reflect the graph's content.

### Example Structure:

<data> - this must be enclosed in <data> tags
<data>
[
  {{
    "x": "2024-01",
    "y": 120,
    "category": "Category A"
  }},
  {{
    "x": "2024-02",
    "y": 150,
    "category": "Category A"
  }},
  {{
    "x": "2024-03",
    "y": 180,
    "category": "Category A"
  }},
  {{
    "x": "2024-01",
    "y": 90,
    "category": "Category B"
  }},
  {{
    "x": "2024-02",
    "y": 130,
    "category": "Category B"
  }},
  {{
    "x": "2024-03",
    "y": 160,
    "category": "Category B"
  }}
]
</data>

<config> - this must be enclosed in <config> tags
{{
  "responsive": true,
  "displayModeBar": true,
  "displaylogo": false
}}
</config>

<layout> - this must be enclosed in <layout> tags
{{
  "title": {{
    "text": "Area Chart Example",
    "font": {{
      "size": 18
    }}
  }},
  "xaxis": {{
    "title": "Date",
    "type": "date"
  }},
  "yaxis": {{
    "title": "Values"
  }},
  "showlegend": true
}}
</layout>

<description>
This Area chart visualizes the trend of values over time for two categories: 'Category A' and 'Category B'. The x-axis represents the time period (e.g., months), while the y-axis shows the values. The chart displays the area covered by each category, highlighting the trends and comparisons between the two categories over the selected period. The chart includes different colored areas for each category, providing a clear view of how values evolve over time and the relative performance between the categories.
</description>

<csv>
x, y, category
2024-01, 120, Category A
2024-02, 150, Category A
2024-03, 180, Category A
2024-01, 90, Category B
2024-02, 130, Category B
2024-03, 160, Category B
</csv>

Make sure all the generated files are enclosed within the appropriate tags and follow the specified structure. Ensure that the JSON files are formatted correctly and the CSV file contains the necessary data for the chart.
'''


In [47]:
def generate_dataset(prompt):
    output = gemini_model(prompt)
    print(output)
    save_json_to_unique_folder(output, chart_type=type_of_chart)


In [49]:
generate_dataset(prompt)

## Weekly Commute Time Area Chart Visualization

### <data>
[
  {
    "x": "Monday",
    "y": 35,
    "category": "Car"
  },
  {
    "x": "Tuesday",
    "y": 38,
    "category": "Car"
  },
  {
    "x": "Wednesday",
    "y": 42,
    "category": "Car"
  },
  {
    "x": "Thursday",
    "y": 45,
    "category": "Car"
  },
  {
    "x": "Friday",
    "y": 40,
    "category": "Car"
  },
  {
    "x": "Saturday",
    "y": 25,
    "category": "Car"
  },
  {
    "x": "Sunday",
    "y": 20,
    "category": "Car"
  },
  {
    "x": "Monday",
    "y": 40,
    "category": "Public Transport"
  },
  {
    "x": "Tuesday",
    "y": 45,
    "category": "Public Transport"
  },
  {
    "x": "Wednesday",
    "y": 50,
    "category": "Public Transport"
  },
  {
    "x": "Thursday",
    "y": 55,
    "category": "Public Transport"
  },
  {
    "x": "Friday",
    "y": 50,
    "category": "Public Transport"
  },
  {
    "x": "Saturday",
    "y": 35,
    "category": "Public Transport"
  },
  {
    "x": "Sunday",
  

Box Plot

In [50]:
type_of_chart = "box plot"

In [60]:
import re
import os
import json
import plotly.express as px
import pandas as pd

def save_json_to_unique_folder(input_string, chart_type='box plot', base_folder='dataset'):
    """Extract JSON sections, save them into a uniquely numbered folder, and plot a Box plot."""

    # Extract the JSON sections
    data_json = extract_json_section(input_string, 'data')
    config_json = extract_json_section(input_string, 'config')
    layout_json = extract_json_section(input_string, 'layout')
    csv_data = extract_json_section(input_string, 'csv')

    if not data_json or not config_json or not layout_json:
        raise ValueError("One or more JSON or csv sections (data, config, layout) were not found in the input string.")

    # Create a base folder (e.g., dataset) if it doesn't exist
    if not os.path.exists(base_folder):
        os.makedirs(base_folder)

    # Find the next unique folder number
    folder_number = len([name for name in os.listdir(base_folder) if os.path.isdir(os.path.join(base_folder, name))]) + 1
    unique_folder = os.path.join(base_folder, f"{chart_type}_{folder_number}")

    # Create the unique folder
    os.makedirs(unique_folder, exist_ok=True)

    # Save the JSON sections as files in the unique folder
    with open(os.path.join(unique_folder, 'data.json'), 'w') as data_file:
        json.dump(json.loads(data_json), data_file, indent=4)

    with open(os.path.join(unique_folder, 'config.json'), 'w') as config_file:
        json.dump(json.loads(config_json), config_file, indent=4)

    with open(os.path.join(unique_folder, 'layout.json'), 'w') as layout_file:
        json.dump(json.loads(layout_json), layout_file, indent=4)

    # Save the CSV data as a file in the unique folder
    with open(os.path.join(unique_folder, 'data.csv'), 'w') as csv_file:
        csv_file.write(csv_data)

    # Parse JSON strings
    data = parse_json(data_json)
    layout = parse_json(layout_json)
    config = parse_json(config_json) if config_json else None

    if not data or not layout:
        raise ValueError("Invalid or missing data or layout JSON.")

    # Box plot requires 'x' (categorical) and 'y' (numerical) fields
    x_values = [item.get('x') for item in data if 'x' in item]
    y_values = [item.get('y') for item in data if 'y' in item]

    if not x_values or not y_values:
        raise ValueError("Missing 'x' or 'y' field in data for Box plot.")

    # Plot Box plot using Plotly Express
    fig = px.box(
        data_frame=pd.DataFrame(data),
        x='x',
        y='y',
        title=layout.get('title', {}).get('text', 'Box Plot')
    )

    # Apply layout settings from the extracted layout JSON
    fig.update_layout(**layout)

    # Save the figure as an image in the same folder
    image_path = os.path.join(unique_folder, "box_plot.png")
    fig.write_image(image_path)

    print(f"JSON files and Box plot image successfully saved in folder: {unique_folder}")


In [61]:
chart_theme, chart_trend, no_of_rows, no_of_cols = get_random_chart_theme_and_trend()

prompt = f'''
You are an expert in generating data and creating JSON configurations compatible with Plotly for visualizing that data. You will receive several key characteristics about the data, and your task is to generate:
1. A `data.json` file that contains the data.
2. A `config.json` file that defines the configuration settings for Plotly.
3. A `layout.json` file that defines the layout for the chart.
4. A `data.csv` file that provides a CSV representation of the graph data.

Your output should be highly professional, accurate, and formatted as described below.

### Input Parameters:
1. **Theme of the Chart:** {chart_theme}
2. **Type of Chart:** Box Plot
3. **Trends in the Data:** The trends in the data should include as many of the following as possible: {chart_trend}
4. **Data Diversity:** The data should be diverse and include several outliers.
5. **Data Dimensions:**
   - Number of Rows: {no_of_rows}
   - Number of Columns: {no_of_cols}
6. **Related Nouns:** List nouns related to the theme along the first column and row of the table.

### Output Requirements:

1. **Data (data.json):**
   - Generate JSON data based on the provided theme, trends, and dimensions.
   - Ensure the data contains diversity and relevant outliers.

2. **Configuration (config.json):**
   - Define the Plotly configuration for rendering the chart.
   - Include any relevant settings such as responsive behavior, scroll zooming, etc.

3. **Layout (layout.json):**
   - Define the layout of the chart including:
     - Title reflecting the chart's theme.
     - Labels for the x-axis and y-axis.
     - A legend for distinguishing between data series.
     - Annotations for key data points, especially outliers.
     - Margins, gridlines, and other formatting details.

4. **Chart Description:**
   - Provide a detailed description wrapped within <description start> and <description end> tags.
   - Focus on:
     - The chart’s theme.
     - The general and individual trends in the data.
     - Any comparisons between data series.
     - The presence and impact of outliers.

5. **CSV Representation (data.csv):**
   - Generate a CSV file that represents the graph data in tabular form.
   - Include relevant headers and data to reflect the graph's content.

### Example Structure:

<data> - this must be enclosed in <data> tags
<data>
[
  {{
    "x": "Category A",
    "y": 120
  }},
  {{
    "x": "Category A",
    "y": 150
  }},
  {{
    "x": "Category A",
    "y": 180
  }},
  {{
    "x": "Category B",
    "y": 90
  }},
  {{
    "x": "Category B",
    "y": 130
  }},
  {{
    "x": "Category B",
    "y": 160
  }}
]
</data>

<config> - this must be enclosed in <config> tags
{{
  "responsive": true,
  "displayModeBar": true,
  "displaylogo": false
}}
</config>

<layout> - this must be enclosed in <layout> tags
{{
  "title": {{
    "text": "Box Plot Example",
    "font": {{
      "size": 18
    }}
  }},
  "xaxis": {{
    "title": "Category"
  }},
  "yaxis": {{
    "title": "Values"
  }},
  "showlegend": false
}}
</layout>

<description>
This Box plot visualizes the distribution of values across different categories: 'Category A' and 'Category B'. The x-axis represents the categorical variable, while the y-axis shows the numerical values. The box plot displays the median, quartiles, and potential outliers for each category. This visualization helps in understanding the spread and central tendency of the values within each category, highlighting differences and similarities between them.
</description>

<csv>
x, y
Category A, 120
Category A, 150
Category A, 180
Category B, 90
Category B, 130
Category B, 160
</csv>

Make sure all the generated files are enclosed within the appropriate tags and follow the specified structure. Ensure that the JSON files are formatted correctly and the CSV file contains the necessary data for the chart.
'''


In [62]:
def generate_dataset(prompt):
    output = gemini_model(prompt)
    print(output)
    save_json_to_unique_folder(output, chart_type=type_of_chart)


In [63]:
generate_dataset(prompt)

## Daily Journal Summary - Box Plot

<data>
[
  {
    "x": "Mood",
    "y": 3
  },
  {
    "x": "Mood",
    "y": 4
  },
  {
    "x": "Mood",
    "y": 2
  },
  {
    "x": "Mood",
    "y": 5
  },
  {
    "x": "Mood",
    "y": 1
  },
  {
    "x": "Mood",
    "y": 7
  },
  {
    "x": "Productivity",
    "y": 4
  },
  {
    "x": "Productivity",
    "y": 3
  },
  {
    "x": "Productivity",
    "y": 5
  },
  {
    "x": "Productivity",
    "y": 2
  },
  {
    "x": "Productivity",
    "y": 4
  },
  {
    "x": "Productivity",
    "y": 1
  },
  {
    "x": "Sleep",
    "y": 7
  },
  {
    "x": "Sleep",
    "y": 8
  },
  {
    "x": "Sleep",
    "y": 6
  },
  {
    "x": "Sleep",
    "y": 9
  },
  {
    "x": "Sleep",
    "y": 5
  },
  {
    "x": "Sleep",
    "y": 12
  },
  {
    "x": "Exercise",
    "y": 2
  },
  {
    "x": "Exercise",
    "y": 1
  },
  {
    "x": "Exercise",
    "y": 3
  },
  {
    "x": "Exercise",
    "y": 4
  },
  {
    "x": "Exercise",
    "y": 2
  },
  {
    "x": "Exercise",
   

Heatmap

In [64]:
type_of_chart = "heatmap"

In [75]:
def save_json_to_unique_folder(input_string, chart_type='heatmap', base_folder='dataset'):
    """Extract JSON sections, save them into a uniquely numbered folder, and plot a Heatmap."""

    # Extract the JSON sections
    data_json = extract_json_section(input_string, 'data')
    config_json = extract_json_section(input_string, 'config')
    layout_json = extract_json_section(input_string, 'layout')
    csv_data = extract_json_section(input_string, 'csv')

    if not data_json or not config_json or not layout_json:
        raise ValueError("One or more JSON or csv sections (data, config, layout) were not found in the input string.")

    # Create a base folder (e.g., dataset) if it doesn't exist
    if not os.path.exists(base_folder):
        os.makedirs(base_folder)

    # Find the next unique folder number
    folder_number = len([name for name in os.listdir(base_folder) if os.path.isdir(os.path.join(base_folder, name))]) + 1
    unique_folder = os.path.join(base_folder, f"{chart_type}_{folder_number}")

    # Create the unique folder
    os.makedirs(unique_folder, exist_ok=True)

    # Save the JSON sections as files in the unique folder
    with open(os.path.join(unique_folder, 'data.json'), 'w') as data_file:
        json.dump(parse_json(data_json), data_file, indent=4)

    with open(os.path.join(unique_folder, 'config.json'), 'w') as config_file:
        json.dump(parse_json(config_json), config_file, indent=4)

    with open(os.path.join(unique_folder, 'layout.json'), 'w') as layout_file:
        json.dump(parse_json(layout_json), layout_file, indent=4)

    # Save the CSV data as a file in the unique folder
    with open(os.path.join(unique_folder, 'data.csv'), 'w') as csv_file:
        csv_file.write(csv_data)

    # Parse JSON strings
    data = parse_json(data_json)
    layout = parse_json(layout_json)
    config = parse_json(config_json) if config_json else None

    if not data or not layout:
        raise ValueError("Invalid or missing data or layout JSON.")

    # Convert data to a DataFrame for heatmap plotting
    df = pd.DataFrame(data)

    # Heatmap requires 'x', 'y', and 'z' fields
    if not all(col in df.columns for col in ['x', 'y', 'z']):
        raise ValueError("Missing 'x', 'y', or 'z' field in data for Heatmap.")

    # Plot Heatmap using Plotly Express
    fig = px.imshow(
        df.pivot(index='y', columns='x', values='z'),
        title=layout.get('title', {}).get('text', 'Heatmap'),
        color_continuous_scale='Viridis'
    )

    # Apply layout settings from the extracted layout JSON
    fig.update_layout(**layout)

    # Save the figure as an image in the same folder
    image_path = os.path.join(unique_folder, "heatmap.png")
    fig.write_image(image_path)

    print(f"JSON files and Heatmap image successfully saved in folder: {unique_folder}")

In [76]:
chart_theme, chart_trend, no_of_rows, no_of_cols = get_random_chart_theme_and_trend()

prompt = f'''
You are an expert in generating data and creating JSON configurations compatible with Plotly for visualizing that data. You will receive several key characteristics about the data, and your task is to generate:
1. A `data.json` file that contains the data.
2. A `config.json` file that defines the configuration settings for Plotly.
3. A `layout.json` file that defines the layout for the chart.
4. A `data.csv` file that provides a CSV representation of the graph data.

Your output should be highly professional, accurate, and formatted as described below.

### Input Parameters:
1. **Theme of the Chart:** {chart_theme}
2. **Type of Chart:** Heatmap
3. **Trends in the Data:** The trends in the data should include as many of the following as possible: {chart_trend}
4. **Data Diversity:** The data should be diverse and include several outliers.
5. **Data Dimensions:**
   - Number of Rows: {no_of_rows}
   - Number of Columns: {no_of_cols}
6. **Related Nouns:** List nouns related to the theme along the first column and row of the table.

### Output Requirements:

1. **Data (data.json):**
   - Generate JSON data based on the provided theme, trends, and dimensions.
   - Ensure the data contains diversity and relevant outliers.

2. **Configuration (config.json):**
   - Define the Plotly configuration for rendering the chart.
   - Include any relevant settings such as responsive behavior, scroll zooming, etc.

3. **Layout (layout.json):**
   - Define the layout of the chart including:
     - Title reflecting the chart's theme.
     - Labels for the x-axis and y-axis.
     - Annotations for key data points, especially outliers.
     - Margins, gridlines, and other formatting details.

4. **Chart Description:**
   - Provide a detailed description wrapped within <description start> and <description end> tags.
   - Focus on:
     - The chart’s theme.
     - The general and individual trends in the data.
     - Any comparisons between data series.
     - The presence and impact of outliers.

5. **CSV Representation (data.csv):**
   - Generate a CSV file that represents the graph data in tabular form.
   - Include relevant headers and data to reflect the graph's content.

### Example Structure:

<data> - this must be enclosed in <data> tags
<data>
[
  {{"x": "Category A", "y": "Week 1", "z": 10}},
  {{"x": "Category A", "y": "Week 2", "z": 20}},
  {{"x": "Category A", "y": "Week 3", "z": 15}},
  {{"x": "Category B", "y": "Week 1", "z": 25}},
  {{"x": "Category B", "y": "Week 2", "z": 30}},
  {{"x": "Category B", "y": "Week 3", "z": 22}}
]
</data>

<config> - this must be enclosed in <config> tags
{{
  "responsive": true,
  "displayModeBar": true,
  "displaylogo": false
}}
</config>

<layout> - this must be enclosed in <layout> tags
{{
  "title": {{
    "text": "Heatmap Example",
    "font": {{
      "size": 18
    }}
  }},
  "xaxis": {{
    "title": "Category"
  }},
  "yaxis": {{
    "title": "Week"
  }},
  "coloraxis": {{
    "colorbar": {{
      "title": "Values"
    }}
  }}
}}
</layout>

<description>
This Heatmap visualizes the intensity of values across different categories and weeks. The x-axis represents different categories, while the y-axis represents weeks. The heatmap uses color intensity to display the magnitude of values, with brighter colors indicating higher values. This visualization helps in understanding the distribution and variations of values across categories and time periods, providing insights into trends and patterns in the data.
</description>

<csv>
x, y, z
Category A, Week 1, 10
Category A, Week 2, 20
Category A, Week 3, 15
Category B, Week 1, 25
Category B, Week 2, 30
Category B, Week 3, 22
</csv>

Make sure all the generated files are enclosed within the appropriate tags and follow the specified structure. Ensure that the JSON files are formatted correctly and the CSV file contains the necessary data for the chart.
'''


In [77]:
def generate_dataset(prompt):
    output = gemini_model(prompt)
    print(output)
    save_json_to_unique_folder(output, chart_type=type_of_chart)


In [79]:
generate_dataset(prompt)

## Daily Water Intake Heatmap

### <data>
[
  { "x": "Morning", "y": "Monday", "z": 1000 },
  { "x": "Morning", "y": "Tuesday", "z": 1200 },
  { "x": "Morning", "y": "Wednesday", "z": 1100 },
  { "x": "Morning", "y": "Thursday", "z": 1150 },
  { "x": "Morning", "y": "Friday", "z": 1050 },
  { "x": "Morning", "y": "Saturday", "z": 800 },
  { "x": "Morning", "y": "Sunday", "z": 700 },
  { "x": "Afternoon", "y": "Monday", "z": 1100 },
  { "x": "Afternoon", "y": "Tuesday", "z": 1300 },
  { "x": "Afternoon", "y": "Wednesday", "z": 1200 },
  { "x": "Afternoon", "y": "Thursday", "z": 1250 },
  { "x": "Afternoon", "y": "Friday", "z": 1150 },
  { "x": "Afternoon", "y": "Saturday", "z": 900 },
  { "x": "Afternoon", "y": "Sunday", "z": 800 },
  { "x": "Evening", "y": "Monday", "z": 800 },
  { "x": "Evening", "y": "Tuesday", "z": 900 },
  { "x": "Evening", "y": "Wednesday", "z": 850 },
  { "x": "Evening", "y": "Thursday", "z": 950 },
  { "x": "Evening", "y": "Friday", "z": 1000 },
  { "x": "Evenin


In the future `np.bool` will be defined as the corresponding NumPy scalar.



AttributeError: module 'numpy' has no attribute 'bool'.
`np.bool` was a deprecated alias for the builtin `bool`. To avoid this error in existing code, use `bool` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.bool_` here.
The aliases was originally deprecated in NumPy 1.20; for more details and guidance see the original release note at:
    https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations

Funnel Chart

In [80]:
type_of_chart = "heatmap"

In [81]:
import re
import os
import json
import plotly.express as px
import pandas as pd


def save_json_to_unique_folder(input_string, chart_type='funnel', base_folder='dataset'):
    """Extract JSON sections, save them into a uniquely numbered folder, and plot a Funnel Chart."""

    # Extract the JSON sections
    data_json = extract_json_section(input_string, 'data')
    config_json = extract_json_section(input_string, 'config')
    layout_json = extract_json_section(input_string, 'layout')
    csv_data = extract_json_section(input_string, 'csv')

    if not data_json or not config_json or not layout_json:
        raise ValueError("One or more JSON or csv sections (data, config, layout) were not found in the input string.")

    # Create a base folder (e.g., dataset) if it doesn't exist
    if not os.path.exists(base_folder):
        os.makedirs(base_folder)

    # Find the next unique folder number
    folder_number = len([name for name in os.listdir(base_folder) if os.path.isdir(os.path.join(base_folder, name))]) + 1
    unique_folder = os.path.join(base_folder, f"{chart_type}_{folder_number}")

    # Create the unique folder
    os.makedirs(unique_folder, exist_ok=True)

    # Save the JSON sections as files in the unique folder
    with open(os.path.join(unique_folder, 'data.json'), 'w') as data_file:
        json.dump(parse_json(data_json), data_file, indent=4)

    with open(os.path.join(unique_folder, 'config.json'), 'w') as config_file:
        json.dump(parse_json(config_json), config_file, indent=4)

    with open(os.path.join(unique_folder, 'layout.json'), 'w') as layout_file:
        json.dump(parse_json(layout_json), layout_file, indent=4)

    # Save the CSV data as a file in the unique folder
    with open(os.path.join(unique_folder, 'data.csv'), 'w') as csv_file:
        csv_file.write(csv_data)

    # Parse JSON strings
    data = parse_json(data_json)
    layout = parse_json(layout_json)
    config = parse_json(config_json) if config_json else None

    if not data or not layout:
        raise ValueError("Invalid or missing data or layout JSON.")

    # Convert data to a DataFrame for funnel chart plotting
    df = pd.DataFrame(data)

    # Funnel chart requires 'stage' and 'value' fields
    if not all(col in df.columns for col in ['stage', 'value']):
        raise ValueError("Missing 'stage' or 'value' field in data for Funnel Chart.")

    # Plot Funnel Chart using Plotly Express
    fig = px.funnel(
        df,
        x='value',
        y='stage',
        title=layout.get('title', {}).get('text', 'Funnel Chart')
    )

    # Apply layout settings from the extracted layout JSON
    fig.update_layout(**layout)

    # Save the figure as an image in the same folder
    image_path = os.path.join(unique_folder, "funnel_chart.png")
    fig.write_image(image_path)

    print(f"JSON files and Funnel Chart image successfully saved in folder: {unique_folder}")


In [82]:
chart_theme, chart_trend, no_of_rows, no_of_cols = get_random_chart_theme_and_trend()

prompt = f'''
You are an expert in generating data and creating JSON configurations compatible with Plotly for visualizing that data. You will receive several key characteristics about the data, and your task is to generate:
1. A `data.json` file that contains the data.
2. A `config.json` file that defines the configuration settings for Plotly.
3. A `layout.json` file that defines the layout for the chart.
4. A `data.csv` file that provides a CSV representation of the graph data.

Your output should be highly professional, accurate, and formatted as described below.

### Input Parameters:
1. **Theme of the Chart:** {chart_theme}
2. **Type of Chart:** Funnel
3. **Trends in the Data:** The trends in the data should include as many of the following as possible: {chart_trend}
4. **Data Diversity:** The data should be diverse and include several outliers.
5. **Data Dimensions:**
   - Number of Stages: {no_of_rows}
   - Number of Values: {no_of_cols}
6. **Related Nouns:** List nouns related to the theme along the stages of the funnel.

### Output Requirements:

1. **Data (data.json):**
   - Generate JSON data based on the provided theme, trends, and dimensions.
   - Ensure the data contains diversity and relevant outliers.

2. **Configuration (config.json):**
   - Define the Plotly configuration for rendering the chart.
   - Include any relevant settings such as responsive behavior, scroll zooming, etc.

3. **Layout (layout.json):**
   - Define the layout of the chart including:
     - Title reflecting the chart's theme.
     - Labels for the stages.
     - Annotations for key data points, especially outliers.
     - Margins, gridlines, and other formatting details.

4. **Chart Description:**
   - Provide a detailed description wrapped within <description start> and <description end> tags.
   - Focus on:
     - The chart’s theme.
     - The general and individual trends in the data.
     - Any comparisons between stages.
     - The presence and impact of outliers.

5. **CSV Representation (data.csv):**
   - Generate a CSV file that represents the graph data in tabular form.
   - Include relevant headers and data to reflect the funnel's stages and values.

### Example Structure:

<data> - this must be enclosed in <data> tags
<data>
[
  {{"stage": "Stage 1", "value": 100}},
  {{"stage": "Stage 2", "value": 80}},
  {{"stage": "Stage 3", "value": 60}},
  {{"stage": "Stage 4", "value": 40}},
  {{"stage": "Stage 5", "value": 20}}
]
</data>

<config> - this must be enclosed in <config> tags
{{
  "responsive": true,
  "displayModeBar": true,
  "displaylogo": false
}}
</config>

<layout> - this must be enclosed in <layout> tags
{{
  "title": {{
    "text": "Funnel Chart Example",
    "font": {{
      "size": 18
    }}
  }},
  "xaxis": {{
    "title": "Value"
  }},
  "yaxis": {{
    "title": "Stage"
  }},
  "bargap": 0.3
}}
</layout>

<description>
This Funnel Chart visualizes the progression through different stages of a process. The y-axis represents the stages, while the x-axis represents the values associated with each stage. The chart provides a clear view of how values decrease from the initial stage to the final stage, highlighting key drop-offs and the overall effectiveness of the process.
</description>

<csv>
stage, value
Stage 1, 100
Stage 2, 80
Stage 3, 60
Stage 4, 40
Stage 5, 20
</csv>

Make sure all the generated files are enclosed within the appropriate tags and follow the specified structure. Ensure that the JSON files are formatted correctly and the CSV file contains the necessary data for the chart.
'''


In [83]:
def generate_dataset(prompt):
    output = gemini_model(prompt)
    print(output)
    save_json_to_unique_folder(output, chart_type=type_of_chart)


In [85]:
generate_dataset(prompt)

## Daily Productivity Levels Funnel Chart

### <data>
[
  {
    "stage": "Idea Generation",
    "value": 100
  },
  {
    "stage": "Task Planning",
    "value": 75
  },
  {
    "stage": "Work in Progress",
    "value": 60
  },
  {
    "stage": "Quality Review",
    "value": 45
  },
  {
    "stage": "Finalization",
    "value": 35
  },
  {
    "stage": "Completion",
    "value": 20
  }
]
</data>

### <config>
{
  "responsive": true,
  "displayModeBar": true,
  "displaylogo": false,
  "scrollZoom": true
}
</config>

### <layout>
{
  "title": {
    "text": "Daily Productivity Levels Funnel Chart",
    "font": {
      "size": 18
    }
  },
  "xaxis": {
    "title": "Number of Tasks",
    "range": [0, 120],
    "tickmode": "linear",
    "tick0": 0,
    "dtick": 20
  },
  "yaxis": {
    "title": "Stage of Task Completion",
    "autorange": "reversed"
  },
  "annotations": [
    {
      "x": 75,
      "y": "Task Planning",
      "xref": "x",
      "yref": "y",
      "text": "Significant Drop"

Financial Charts 

Candlestick Chart

In [15]:
type_of_chart = "candlestick"

In [16]:
import re
import os
import json
import plotly.graph_objects as go
import pandas as pd


def save_json_to_unique_folder(input_string, chart_type='candlestick', base_folder='dataset'):
    """Extract JSON sections, save them into a uniquely numbered folder, and plot a Candlestick Chart."""

    # Extract the JSON sections
    data_json = extract_json_section(input_string, 'data')
    config_json = extract_json_section(input_string, 'config')
    layout_json = extract_json_section(input_string, 'layout')
    csv_data = extract_json_section(input_string, 'csv')

    if not data_json or not config_json or not layout_json:
        raise ValueError("One or more JSON or csv sections (data, config, layout) were not found in the input string.")

    # Create a base folder (e.g., dataset) if it doesn't exist
    if not os.path.exists(base_folder):
        os.makedirs(base_folder)

    # Find the next unique folder number
    folder_number = len([name for name in os.listdir(base_folder) if os.path.isdir(os.path.join(base_folder, name))]) + 1
    unique_folder = os.path.join(base_folder, f"{chart_type}_{folder_number}")

    # Create the unique folder
    os.makedirs(unique_folder, exist_ok=True)

    # Save the JSON sections as files in the unique folder
    with open(os.path.join(unique_folder, 'data.json'), 'w') as data_file:
        json.dump(parse_json(data_json), data_file, indent=4)

    with open(os.path.join(unique_folder, 'config.json'), 'w') as config_file:
        json.dump(parse_json(config_json), config_file, indent=4)

    with open(os.path.join(unique_folder, 'layout.json'), 'w') as layout_file:
        json.dump(parse_json(layout_json), layout_file, indent=4)

    # Save the CSV data as a file in the unique folder
    with open(os.path.join(unique_folder, 'data.csv'), 'w') as csv_file:
        csv_file.write(csv_data)

    # Parse JSON strings
    data = parse_json(data_json)
    layout = parse_json(layout_json)
    config = parse_json(config_json) if config_json else None

    if not data or not layout:
        raise ValueError("Invalid or missing data or layout JSON.")

    # Convert data to a DataFrame for candlestick chart plotting
    df = pd.DataFrame(data)

    # Candlestick chart requires 'date', 'open', 'high', 'low', and 'close' fields
    if not all(col in df.columns for col in ['date', 'open', 'high', 'low', 'close']):
        raise ValueError("Missing 'date', 'open', 'high', 'low', or 'close' field in data for Candlestick Chart.")

    # Plot Candlestick Chart using Plotly
    fig = go.Figure(data=[go.Candlestick(
        x=df['date'],
        open=df['open'],
        high=df['high'],
        low=df['low'],
        close=df['close']
    )])

    # Apply layout settings from the extracted layout JSON
    fig.update_layout(**layout)

    # Save the figure as an image in the same folder
    image_path = os.path.join(unique_folder, "candlestick_chart.png")
    fig.write_image(image_path)

    print(f"JSON files and Candlestick Chart image successfully saved in folder: {unique_folder}")



In [25]:
chart_theme, chart_trend, no_of_rows, no_of_cols = get_random_chart_theme_and_trend()

prompt = f'''
You are an expert in generating data and creating JSON configurations compatible with Plotly for visualizing that data. You will receive several key characteristics about the data, and your task is to generate:
1. A `data.json` file that contains the data.
2. A `config.json` file that defines the configuration settings for Plotly.
3. A `layout.json` file that defines the layout for the chart.
4. A `data.csv` file that provides a CSV representation of the graph data.

Your output should be highly professional, accurate, and formatted as described below.

### Input Parameters:
1. **Theme of the Chart:** {chart_theme}
2. **Type of Chart:** Candlestick
3. **Trends in the Data:** The trends in the data should include as many of the following as possible: {chart_trend}
4. **Data Diversity:** The data should be diverse and include several outliers.
5. **Data Dimensions:**
   - Number of Dates: {no_of_rows}
   - Open, High, Low, Close Values for each Date: {no_of_cols}
6. **Related Nouns:** List nouns related to the theme along the dates and stock attributes.

### Output Requirements:

1. **Data (data.json):**
   - Generate JSON data based on the provided theme, trends, and dimensions.
   - Ensure the data contains diverse values with relevant outliers for open, high, low, and close prices.

2. **Configuration (config.json):**
   - Define the Plotly configuration for rendering the chart.
   - Include any relevant settings such as responsive behavior, scroll zooming, etc.

3. **Layout (layout.json):**
   - Define the layout of the chart including:
     - Title reflecting the chart's theme.
     - Axis labels for Date, Open, High, Low, Close prices.
     - Annotations for key data points, especially outliers.
     - Margins, gridlines, and other formatting details.

4. **Chart Description:**
   - Provide a detailed description wrapped within <description start> and <description end> tags.
   - Focus on:
     - The chart’s theme.
     - The general and individual trends in the data.
     - Any comparisons between open, high, low, and close prices.
     - The presence and impact of outliers.

5. **CSV Representation (data.csv):**
   - Generate a CSV file that represents the graph data in tabular form.
   - Include relevant headers such as Date, Open, High, Low, Close, and their corresponding values.

### Example Structure:

<data> - this must be enclosed in <data> tags
<data>
[
  {{"date": "2024-09-10", "open": 100, "high": 120, "low": 90, "close": 110}},
  {{"date": "2024-09-11", "open": 110, "high": 130, "low": 100, "close": 120}},
  {{"date": "2024-09-12", "open": 120, "high": 140, "low": 110, "close": 130}},
  {{"date": "2024-09-13", "open": 130, "high": 150, "low": 120, "close": 140}},
  {{"date": "2024-09-14", "open": 140, "high": 160, "low": 130, "close": 150}}
]
</data>

<config> - this must be enclosed in <config> tags
{{
  "responsive": true,
  "displayModeBar": true,
  "displaylogo": false
}}
</config>

<layout> - this must be enclosed in <layout> tags
{{
  "title": {{
    "text": "Candlestick Chart Example",
    "font": {{
      "size": 18
    }}
  }},
  "xaxis": {{
    "title": "Date"
  }},
  "yaxis": {{
    "title": "Price"
  }},
  "bargap": 0.3
}}
</layout>

<description>
This Candlestick Chart visualizes the fluctuations in stock prices over a series of dates. The x-axis represents the dates, while the y-axis represents the open, high, low, and close prices. Each candlestick shows the price movements for a given day, highlighting trends and key outliers in the data.
</description>

<csv>
date,open,high,low,close
2024-09-10,100,120,90,110
2024-09-11,110,130,100,120
2024-09-12,120,140,110,130
2024-09-13,130,150,120,140
2024-09-14,140,160,130,150
</csv>

Make sure all the generated files are enclosed within the appropriate tags and follow the specified structure. Ensure that the JSON files are formatted correctly and the CSV file contains the necessary data for the chart.
The tags must be there without any modifications.
'''


In [26]:
def generate_dataset(prompt):
    output = gemini_model(prompt)
    print(output)
    save_json_to_unique_folder(output, chart_type=type_of_chart)
generate_dataset(prompt)

```json
<data>
[
  {
    "date": "2024-09-10",
    "open": 120,
    "high": 135,
    "low": 115,
    "close": 125
  },
  {
    "date": "2024-09-11",
    "open": 125,
    "high": 140,
    "low": 110,
    "close": 130
  },
  {
    "date": "2024-09-12",
    "open": 130,
    "high": 145,
    "low": 120,
    "close": 135
  },
  {
    "date": "2024-09-13",
    "open": 135,
    "high": 155,
    "low": 125,
    "close": 145
  },
  {
    "date": "2024-09-14",
    "open": 145,
    "high": 160,
    "low": 130,
    "close": 150
  },
  {
    "date": "2024-09-15",
    "open": 150,
    "high": 170,
    "low": 140,
    "close": 160
  },
  {
    "date": "2024-09-16",
    "open": 160,
    "high": 180,
    "low": 150,
    "close": 170
  },
  {
    "date": "2024-09-17",
    "open": 170,
    "high": 175,
    "low": 140,
    "close": 155
  }
]
</data>

<config>
{
  "responsive": true,
  "displayModeBar": true,
  "displaylogo": false,
  "scrollZoom": true
}
</config>

<layout>
{
  "title": {
    "text": "Hou

Waterfall 

In [28]:
type_of_chart = "Waterfall"

In [29]:
import re
import os
import json
import plotly.graph_objects as go
import pandas as pd


def save_json_to_unique_folder(input_string, chart_type='waterfall', base_folder='dataset'):
    """Extract JSON sections, save them into a uniquely numbered folder, and plot a Waterfall Chart."""

    # Extract the JSON sections
    data_json = extract_json_section(input_string, 'data')
    config_json = extract_json_section(input_string, 'config')
    layout_json = extract_json_section(input_string, 'layout')
    csv_data = extract_json_section(input_string, 'csv')

    if not data_json or not config_json or not layout_json:
        raise ValueError("One or more JSON or csv sections (data, config, layout) were not found in the input string.")

    # Create a base folder (e.g., dataset) if it doesn't exist
    if not os.path.exists(base_folder):
        os.makedirs(base_folder)

    # Find the next unique folder number
    folder_number = len([name for name in os.listdir(base_folder) if os.path.isdir(os.path.join(base_folder, name))]) + 1
    unique_folder = os.path.join(base_folder, f"{chart_type}_{folder_number}")

    # Create the unique folder
    os.makedirs(unique_folder, exist_ok=True)

    # Save the JSON sections as files in the unique folder
    with open(os.path.join(unique_folder, 'data.json'), 'w') as data_file:
        json.dump(parse_json(data_json), data_file, indent=4)

    with open(os.path.join(unique_folder, 'config.json'), 'w') as config_file:
        json.dump(parse_json(config_json), config_file, indent=4)

    with open(os.path.join(unique_folder, 'layout.json'), 'w') as layout_file:
        json.dump(parse_json(layout_json), layout_file, indent=4)

    # Save the CSV data as a file in the unique folder
    with open(os.path.join(unique_folder, 'data.csv'), 'w') as csv_file:
        csv_file.write(csv_data)

    # Parse JSON strings
    data = parse_json(data_json)
    layout = parse_json(layout_json)
    config = parse_json(config_json) if config_json else None

    if not data or not layout:
        raise ValueError("Invalid or missing data or layout JSON.")

    # Convert data to a DataFrame for waterfall chart plotting
    df = pd.DataFrame(data)

    # Waterfall chart requires 'measure', 'x', and 'y' fields
    if not all(col in df.columns for col in ['x', 'y', 'measure']):
        raise ValueError("Missing 'x', 'y', or 'measure' field in data for Waterfall Chart.")

    # Plot Waterfall Chart using Plotly
    fig = go.Figure(go.Waterfall(
        x=df['x'],
        y=df['y'],
        measure=df['measure']
    ))

    # Apply layout settings from the extracted layout JSON
    fig.update_layout(**layout)

    # Save the figure as an image in the same folder
    image_path = os.path.join(unique_folder, "waterfall_chart.png")
    fig.write_image(image_path)

    print(f"JSON files and Waterfall Chart image successfully saved in folder: {unique_folder}")



In [30]:
chart_theme, chart_trend, no_of_rows, no_of_cols = get_random_chart_theme_and_trend()

prompt = f'''
You are an expert in generating data and creating JSON configurations compatible with Plotly for visualizing that data. You will receive several key characteristics about the data, and your task is to generate:
1. A `data.json` file that contains the data.
2. A `config.json` file that defines the configuration settings for Plotly.
3. A `layout.json` file that defines the layout for the chart.
4. A `data.csv` file that provides a CSV representation of the graph data.

Your output should be highly professional, accurate, and formatted as described below.

### Input Parameters:
1. **Theme of the Chart:** {chart_theme}
2. **Type of Chart:** Waterfall
3. **Trends in the Data:** The trends in the data should include as many of the following as possible: {chart_trend}
4. **Data Diversity:** The data should be diverse and include several outliers.
5. **Data Dimensions:**
   - Number of Categories: {no_of_rows}
   - Measure, x (categories), and y (values): {no_of_cols}
6. **Related Nouns:** List nouns related to the theme along the categories and measure attributes.

### Output Requirements:

1. **Data (data.json):**
   - Generate JSON data based on the provided theme, trends, and dimensions.
   - Ensure the data contains diverse values with relevant outliers for the Waterfall chart.

2. **Configuration (config.json):**
   - Define the Plotly configuration for rendering the chart.
   - Include any relevant settings such as responsive behavior, scroll zooming, etc.

3. **Layout (layout.json):**
   - Define the layout of the chart including:
     - Title reflecting the chart's theme.
     - Axis labels for categories and values.
     - Annotations for key data points, especially outliers.
     - Margins, gridlines, and other formatting details.

4. **Chart Description:**
   - Provide a detailed description wrapped within <description start> and <description end> tags.
   - Focus on:
     - The chart’s theme.
     - The general and individual trends in the data.
     - The significance of each step or category in the Waterfall chart.
     - The presence and impact of outliers.

5. **CSV Representation (data.csv):**
   - Generate a CSV file that represents the graph data in tabular form.
   - Include relevant headers such as Category, Measure, and Value, along with their corresponding values.

### Example Structure:

<data> - this must be enclosed in <data> tags
<data>
[
  {{"x": "2024-09-10", "y": 100, "measure": "relative"}},
  {{"x": "2024-09-11", "y": -20, "measure": "relative"}},
  {{"x": "2024-09-12", "y": 40, "measure": "relative"}},
  {{"x": "2024-09-13", "y": -10, "measure": "relative"}},
  {{"x": "2024-09-14", "y": 50, "measure": "total"}}
]
</data>

<config> - this must be enclosed in <config> tags
{{
  "responsive": true,
  "displayModeBar": true,
  "displaylogo": false
}}
</config>

<layout> - this must be enclosed in <layout> tags
{{
  "title": {{
    "text": "Waterfall Chart Example",
    "font": {{
      "size": 18
    }}
  }},
  "xaxis": {{
    "title": "Categories"
  }},
  "yaxis": {{
    "title": "Values"
  }},
  "bargap": 0.3
}}
</layout>

<description>
This Waterfall Chart visualizes the step-by-step changes in values across several categories. The x-axis represents the categories, while the y-axis represents the value changes. The chart highlights both increases and decreases in values, providing an overview of the net effect. It includes significant outliers that affect the overall trend.
</description>

<csv>
x,y,measure
2024-09-10,100,relative
2024-09-11,-20,relative
2024-09-12,40,relative
2024-09-13,-10,relative
2024-09-14,50,total
</csv>

Make sure all the generated files are enclosed within the appropriate tags and follow the specified structure. Ensure that the JSON files are formatted correctly and the CSV file contains the necessary data for the chart.
The tags must be there without any modifications.
'''



In [32]:
def generate_dataset(prompt):
    output = gemini_model(prompt)
    print(output)
    save_json_to_unique_folder(output, chart_type=type_of_chart)
generate_dataset(prompt)

## Online Shopping Tracker Waterfall Chart

### <data>
[
  {
    "x": "Order Placed",
    "y": 1200,
    "measure": "relative"
  },
  {
    "x": "Order Processing",
    "y": -100,
    "measure": "relative"
  },
  {
    "x": "Shipping Delay",
    "y": -300,
    "measure": "relative"
  },
  {
    "x": "Delivery",
    "y": 80,
    "measure": "relative"
  },
  {
    "x": "Returns",
    "y": -200,
    "measure": "relative"
  },
  {
    "x": "Customer Support",
    "y": -50,
    "measure": "relative"
  },
  {
    "x": "Total Revenue",
    "y": 530,
    "measure": "total"
  }
]
</data>

### <config>
{
  "responsive": true,
  "displayModeBar": true,
  "displaylogo": false
}
</config>

### <layout>
{
  "title": {
    "text": "Online Shopping Order Fulfillment Waterfall Chart",
    "font": {
      "size": 18
    }
  },
  "xaxis": {
    "title": "Order Stages",
    "tickangle": -45
  },
  "yaxis": {
    "title": "Value (in USD)",
    "tickprefix": "$"
  },
  "annotations": [
    {
      "x": "Shi

OHLC Charts

In [33]:
type_of_chart = "OHLC Charts"

In [34]:
import re
import os
import json
import plotly.graph_objects as go
import pandas as pd


def save_json_to_unique_folder(input_string, chart_type='ohlc', base_folder='dataset'):
    """Extract JSON sections, save them into a uniquely numbered folder, and plot an OHLC Chart."""

    # Extract the JSON sections
    data_json = extract_json_section(input_string, 'data')
    config_json = extract_json_section(input_string, 'config')
    layout_json = extract_json_section(input_string, 'layout')
    csv_data = extract_json_section(input_string, 'csv')

    if not data_json or not config_json or not layout_json:
        raise ValueError("One or more JSON or CSV sections (data, config, layout) were not found in the input string.")

    # Create a base folder (e.g., dataset) if it doesn't exist
    if not os.path.exists(base_folder):
        os.makedirs(base_folder)

    # Find the next unique folder number
    folder_number = len([name for name in os.listdir(base_folder) if os.path.isdir(os.path.join(base_folder, name))]) + 1
    unique_folder = os.path.join(base_folder, f"{chart_type}_{folder_number}")

    # Create the unique folder
    os.makedirs(unique_folder, exist_ok=True)

    # Save the JSON sections as files in the unique folder
    with open(os.path.join(unique_folder, 'data.json'), 'w') as data_file:
        json.dump(parse_json(data_json), data_file, indent=4)

    with open(os.path.join(unique_folder, 'config.json'), 'w') as config_file:
        json.dump(parse_json(config_json), config_file, indent=4)

    with open(os.path.join(unique_folder, 'layout.json'), 'w') as layout_file:
        json.dump(parse_json(layout_json), layout_file, indent=4)

    # Save the CSV data as a file in the unique folder
    with open(os.path.join(unique_folder, 'data.csv'), 'w') as csv_file:
        csv_file.write(csv_data)

    # Parse JSON strings
    data = parse_json(data_json)
    layout = parse_json(layout_json)
    config = parse_json(config_json) if config_json else None

    if not data or not layout:
        raise ValueError("Invalid or missing data or layout JSON.")

    # Convert data to a DataFrame for OHLC chart plotting
    df = pd.DataFrame(data)

    # OHLC chart requires 'date', 'open', 'high', 'low', 'close' fields
    if not all(col in df.columns for col in ['date', 'open', 'high', 'low', 'close']):
        raise ValueError("Missing 'date', 'open', 'high', 'low', or 'close' field in data for OHLC Chart.")

    # Plot OHLC Chart using Plotly
    fig = go.Figure(go.Ohlc(
        x=df['date'],
        open=df['open'],
        high=df['high'],
        low=df['low'],
        close=df['close']
    ))

    # Apply layout settings from the extracted layout JSON
    fig.update_layout(**layout)

    # Save the figure as an image in the same folder
    image_path = os.path.join(unique_folder, "ohlc_chart.png")
    fig.write_image(image_path)

    print(f"JSON files and OHLC Chart image successfully saved in folder: {unique_folder}")



In [39]:
chart_theme, chart_trend, no_of_rows, no_of_cols = get_random_chart_theme_and_trend()

prompt = f'''
You are an expert in generating data and creating JSON configurations compatible with Plotly for visualizing that data. You will receive several key characteristics about the data, and your task is to generate:
1. A `data.json` file that contains the data.
2. A `config.json` file that defines the configuration settings for Plotly.
3. A `layout.json` file that defines the layout for the chart.
4. A `data.csv` file that provides a CSV representation of the graph data.

Your output should be highly professional, accurate, and formatted as described below.

### Input Parameters:
1. **Theme of the Chart:** {chart_theme}
2. **Type of Chart:** OHLC
3. **Trends in the Data:** The trends in the data should include as many of the following as possible: {chart_trend}
4. **Data Diversity:** The data should be diverse and include several outliers.
5. **Data Dimensions:**
   - Number of Data Points: {no_of_rows}
   - Attributes: Date, Open, High, Low, Close

### Output Requirements:

1. **Data (data.json):**
   - Generate JSON data based on the provided theme, trends, and dimensions.
   - Ensure the data contains diverse values with relevant outliers for the OHLC chart.

2. **Configuration (config.json):**
   - Define the Plotly configuration for rendering the chart.
   - Include any relevant settings such as responsive behavior, scroll zooming, etc.

3. **Layout (layout.json):**
   - Define the layout of the chart including:
     - Title reflecting the chart's theme.
     - Axis labels for date (x-axis) and price (y-axis).
     - Annotations for key data points, especially outliers.
     - Margins, gridlines, and other formatting details.

4. **Chart Description:**
   - Provide a detailed description wrapped within <description start> and <description end> tags.
   - Focus on:
     - The chart’s theme.
     - The general and individual trends in the data.
     - The significance of price changes in the OHLC chart.
     - The presence and impact of outliers.

5. **CSV Representation (data.csv):**
   - Generate a CSV file that represents the graph data in tabular form.
   - Include relevant headers such as Date, Open, High, Low, Close, along with their corresponding values.

### Example Structure:

<data> - this must be enclosed in <data> tags
<data>
[
  {{"date": "2024-09-10", "open": 100, "high": 120, "low": 90, "close": 110}},
  {{"date": "2024-09-11", "open": 110, "high": 130, "low": 100, "close": 120}},
  {{"date": "2024-09-12", "open": 120, "high": 140, "low": 110, "close": 130}},
  {{"date": "2024-09-13", "open": 130, "high": 150, "low": 120, "close": 140}},
  {{"date": "2024-09-14", "open": 140, "high": 160, "low": 130, "close": 150}}
]
</data>

<config> - this must be enclosed in <config> tags
{{
  "responsive": true,
  "displayModeBar": true,
  "displaylogo": false
}}
</config>

<layout> - this must be enclosed in <layout> tags
{{
  "title": {{
    "text": "OHLC Chart Example",
    "font": {{
      "size": 18
    }}
  }},
  "xaxis": {{
    "title": "Date"
  }},
  "yaxis": {{
    "title": "Price"
  }},
  "bargap": 0.3
}}
</layout>

<description>
This OHLC Chart visualizes the open, high, low, and close prices over time. The x-axis represents the dates, while the y-axis shows the price changes. The chart highlights price fluctuations across several days, including significant outliers that impact the overall trend.
</description>

<csv>
date,open,high,low,close
2024-09-10,100,120,90,110
2024-09-11,110,130,100,120
2024-09-12,120,140,110,130
2024-09-13,130,150,120,140
2024-09-14,140,160,130,150
</csv>

Make sure all the generated files are enclosed within the appropriate tags and follow the specified structure. Ensure that the JSON files are formatted correctly and the CSV file contains the necessary data for the chart.
The tags must be there without any modifications.
'''


In [41]:
def generate_dataset(prompt):
    output = gemini_model(prompt)
    print(output)
    save_json_to_unique_folder(output, chart_type=type_of_chart)
generate_dataset(prompt)

<data>
[
  {
    "date": "2024-09-10",
    "open": 150,
    "high": 160,
    "low": 140,
    "close": 145
  },
  {
    "date": "2024-09-11",
    "open": 145,
    "high": 150,
    "low": 130,
    "close": 135
  },
  {
    "date": "2024-09-12",
    "open": 135,
    "high": 140,
    "low": 120,
    "close": 125
  },
  {
    "date": "2024-09-13",
    "open": 125,
    "high": 130,
    "low": 110,
    "close": 115
  },
  {
    "date": "2024-09-14",
    "open": 115,
    "high": 120,
    "low": 100,
    "close": 105
  },
  {
    "date": "2024-09-15",
    "open": 105,
    "high": 110,
    "low": 90,
    "close": 95
  },
  {
    "date": "2024-09-16",
    "open": 95,
    "high": 100,
    "low": 80,
    "close": 85
  },
  {
    "date": "2024-09-17",
    "open": 85,
    "high": 90,
    "low": 70,
    "close": 75
  }
]
</data>

<config>
{
  "responsive": true,
  "displayModeBar": true,
  "displaylogo": false,
  "scrollZoom": true
}
</config>

<layout>
{
  "title": {
    "text": "Travel Itinerary Tim