In [1]:
# Required Libraries

import google.generativeai as genai
from openai import OpenAI
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
# from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain_community.llms import OpenAI
# from langchain_google_genai import ChatGoogleGenerativeAI

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Required Secret Key
from config import gemini_key, open_ai_key

In [3]:
def input_image_setup(file_loc):
    from pathlib import Path

    if not (img := Path(file_loc)).exists():
        raise FileNotFoundError(f"Could not find image: {img}")

    image_parts = [
        {
            "mime_type": "image/jpeg",
            "data": Path(file_loc).read_bytes()
            }
        ]
    return image_parts
def get_image_info(image_loc, prompt):
    genai.configure(api_key=gemini_key)
    # Set up the model
    generation_config = {
        "temperature":0,
        "top_p":1,
        "top_k":32,
        "max_output_tokens":4096,
    }
    
    model = genai.GenerativeModel(model_name="gemini-pro-vision", generation_config=generation_config)

    input_prompt = """ You are an expert in data visualization and graph analysis, adept at interpreting graphical data and generating structured JSON configurations for Plotly"""

    question_prompt = prompt

    image_prompt = input_image_setup(image_loc)
    prompt_parts = [input_prompt, image_prompt[0], question_prompt]
    response = model.generate_content(prompt_parts)
    return str(response.text)

In [4]:
import plotly.graph_objs as go
import plotly.io as pio
import json
import re

def extract_json_section(input_string, tag):
    """Extract JSON section between specified XML-like tags."""
    pattern = f"<{tag}>(.*?)</{tag}>"
    match = re.search(pattern, input_string, re.DOTALL)
    if match:
        return match.group(1).strip()
    return None

def parse_json(json_str):
    """Attempt to parse JSON with relaxed rules."""
    try:
        return json.loads(json_str)
    except json.JSONDecodeError:
        # Try replacing single quotes with double quotes and parsing again
        try:
            fixed_json_str = json_str.replace("'", '"')
            return json.loads(fixed_json_str)
        except json.JSONDecodeError as e:
            raise ValueError(f"Invalid JSON: {e}")

def plot_from_ai_output(input_string):
    # Extract JSON sections
    data_json = extract_json_section(input_string, "data")
    layout_json = extract_json_section(input_string, "layout")
    config_json = extract_json_section(input_string, "config")

    # Parse JSON strings with relaxed rules
    data = parse_json(data_json) if data_json else None
    layout = parse_json(layout_json) if layout_json else None
    config = parse_json(config_json) if config_json else None

    if not data or not layout:
        raise ValueError("Invalid or missing data or layout JSON.")

    # Prepare traces for the plot
    traces = []
    for trace_data in data:
        trace_type = trace_data.get('type')

        if trace_type == 'bar':
            trace = go.Bar(
                x=trace_data.get('x', []),
                y=trace_data.get('y', []),
                name=trace_data.get('name', ''),
                marker=dict(color=trace_data['marker']['color']) if 'marker' in trace_data else None
            )
        elif trace_type == 'line':
            trace = go.Scatter(
                x=trace_data.get('x', []),
                y=trace_data.get('y', []),
                mode='lines',
                name=trace_data.get('name', ''),
                line=dict(color=trace_data['line']['color'], width=trace_data['line']['width']) if 'line' in trace_data else None,
                hoverinfo='text',
                text=trace_data['text'] if 'text' in trace_data else None
            )
        elif trace_type == 'scatter':
            trace = go.Scatter(
                x=trace_data.get('x', []),
                y=trace_data.get('y', []),
                mode=trace_data.get('mode', 'markers'),
                name=trace_data.get('name', ''),
                marker=dict(color=trace_data['marker']['color']) if 'marker' in trace_data else None,
                line=dict(color=trace_data['line']['color'], width=trace_data['line']['width']) if 'line' in trace_data else None,
                hoverinfo='text',
                text=trace_data['text'] if 'text' in trace_data else None
            )
        elif trace_type == 'pie':
            trace = go.Pie(
                labels=trace_data.get('labels', []),
                values=trace_data.get('values', []),
                name=trace_data.get('name', ''),
                textinfo=trace_data.get('textinfo', 'percent+label'),
                hoverinfo=trace_data.get('hoverinfo', 'label+percent+name')
            )
        else:
            raise ValueError(f"Unsupported trace type: {trace_type}")

        traces.append(trace)

    # Create figure with the extracted layout and data
    fig = go.Figure(data=traces, layout=layout)

    # Render the figure with the config
    pio.show(fig, config=config)

    # Save the figure as an image (optional)
    fig.write_image("new_image.png")

In [5]:
import plotly.graph_objs as go
import plotly.io as pio
import json
import re

def extract_json_section(input_string, tag):
    """Extract JSON section between specified XML-like tags."""
    pattern = f"<{tag}>(.*?)</{tag}>"
    match = re.search(pattern, input_string, re.DOTALL)
    if match:
        return match.group(1).strip()
    return None

def parse_json(json_str):
    """Attempt to parse JSON with relaxed rules."""
    try:
        return json.loads(json_str)
    except json.JSONDecodeError:
        # Try replacing single quotes with double quotes and parsing again
        try:
            fixed_json_str = json_str.replace("'", '"')
            return json.loads(fixed_json_str)
        except json.JSONDecodeError as e:
            raise ValueError(f"Invalid JSON: {e}")

def plot_from_ai_output_v2(input_string):
    # Extract JSON sections
    data_json = extract_json_section(input_string, "data")
    layout_json = extract_json_section(input_string, "layout")
    config_json = extract_json_section(input_string, "config")

    # Parse JSON strings with relaxed rules
    data = parse_json(data_json) if data_json else None
    layout = parse_json(layout_json) if layout_json else None
    config = parse_json(config_json) if config_json else None

    if not data or not layout:
        raise ValueError("Invalid or missing data or layout JSON.")

    # Prepare traces for the plot
    traces = []
    for trace_data in data:
        trace_type = trace_data.get('type')

        if trace_type == 'bar':
            trace = go.Bar(
                x=trace_data.get('x', []),
                y=trace_data.get('y', []),
                name=trace_data.get('name', ''),
                marker=dict(color=trace_data['marker']['color']) if 'marker' in trace_data else None,
                text=trace_data.get('text', ''),
                hoverinfo=trace_data.get('hoverinfo', 'x+y+name'),
                orientation=trace_data.get('orientation', 'v'),
                offsetgroup=trace_data.get('offsetgroup', None),
                base=trace_data.get('base', None)
            )
            if trace_data.get('stackgroup'):  # For stacked bar charts
                trace.update(barmode='stack')

        elif trace_type == 'line':
            trace = go.Scatter(
                x=trace_data.get('x', []),
                y=trace_data.get('y', []),
                mode='lines',
                name=trace_data.get('name', ''),
                line=dict(color=trace_data['line']['color'], width=trace_data['line']['width']) if 'line' in trace_data else None,
                hoverinfo='text',
                text=trace_data['text'] if 'text' in trace_data else None
            )

        elif trace_type == 'scatter':
            trace = go.Scatter(
                x=trace_data.get('x', []),
                y=trace_data.get('y', []),
                mode=trace_data.get('mode', 'markers'),
                name=trace_data.get('name', ''),
                marker=dict(color=trace_data['marker']['color']) if 'marker' in trace_data else None,
                line=dict(color=trace_data['line']['color'], width=trace_data['line']['width']) if 'line' in trace_data else None,
                hoverinfo='text',
                text=trace_data['text'] if 'text' in trace_data else None
            )

        elif trace_type == 'pie':
            trace = go.Pie(
                labels=trace_data.get('labels', []),
                values=trace_data.get('values', []),
                name=trace_data.get('name', ''),
                textinfo=trace_data.get('textinfo', 'percent+label'),
                hoverinfo=trace_data.get('hoverinfo', 'label+percent+name')
            )

        elif trace_type == 'area':
            trace = go.Scatter(
                x=trace_data.get('x', []),
                y=trace_data.get('y', []),
                mode='lines',
                name=trace_data.get('name', ''),
                fill='tozeroy',
                line=dict(color=trace_data['line']['color'], width=trace_data['line']['width']) if 'line' in trace_data else None,
                hoverinfo='text',
                text=trace_data['text'] if 'text' in trace_data else None
            )

        elif trace_type == 'scatter3d':
            trace = go.Scatter3d(
                x=trace_data.get('x', []),
                y=trace_data.get('y', []),
                z=trace_data.get('z', []),
                mode=trace_data.get('mode', 'markers'),
                name=trace_data.get('name', ''),
                marker=dict(color=trace_data['marker']['color']) if 'marker' in trace_data else None,
                line=dict(color=trace_data['line']['color'], width=trace_data['line']['width']) if 'line' in trace_data else None,
                hoverinfo='text',
                text=trace_data['text'] if 'text' in trace_data else None
            )

        elif trace_type == 'surface':
            trace = go.Surface(
                z=trace_data.get('z', []),
                name=trace_data.get('name', ''),
                colorscale=trace_data.get('colorscale', 'Viridis'),
                hoverinfo=trace_data.get('hoverinfo', 'z+name')
            )

        else:
            raise ValueError(f"Unsupported trace type: {trace_type}")

        traces.append(trace)

    # Create figure with the extracted layout and data
    fig = go.Figure(data=traces, layout=layout)

    # Render the figure with the config
    pio.show(fig, config=config)

    # Save the figure as an image (optional)
    fig.write_image("new_image.png")


In [6]:
prompt1 = '''
Task: Extract detailed information from a given graph image and generate JSON files (data.json, layout.json, and config.json) compatible with Plotly. 
These JSON files should capture all aspects of the graph, including data points, layout configuration, and display settings.

Instructions:
Graph Analysis:

Identify and categorize the plot types present in the graph (e.g., line, scatter, bar, pie).
Extract the data points for each plot (x-values, y-values, and any other relevant data such as labels for pie charts).
Note any multiple plots on the same figure and their configurations.

Generate data.json:

For each plot, include details such as:
type: The type of plot (e.g., scatter, bar, pie).
x and y: Arrays of data points for the x and y axes.
labels and values for pie charts.
mode: For line and scatter plots (e.g., markers, lines, or markers+lines).
name: Legend entry for the plot.
marker: Properties such as color, size, and symbol for markers.
line: Properties such as color, width, and dash style for lines.
text: Hover text for each data point.
hoverinfo: Information displayed on hover (e.g., x+y+text).
Generate layout.json:
Copy the same layout as the original image, same color, same font, same size, 
same position, same orientation.
Include details such as:
title: The title of the graph, including text, font properties (family, size, color).
xaxis and yaxis: Configuration including title, range, showgrid, gridcolor, zeroline, zerolinecolor, showticklabels, tickangle, tickfont.
legend: Properties including orientation, x, y, xanchor, font.
margin: Values for l (left), r (right), b (bottom), t (top), pad.
plot_bgcolor and paper_bgcolor: Background colors of the plot area and paper.
Generate config.json:

Include configuration settings such as:
responsive: Whether the graph is responsive (boolean).
displayModeBar: Whether the mode bar is displayed (boolean).
modeBarButtonsToRemove: List of mode bar buttons to remove (e.g., ["toImage"]).
scrollZoom: Whether scrolling zoom is enabled (boolean).
Output Requirements:

data.json: Contains an array of plot traces with their respective properties.
layout.json: Defines the layout configuration of the graph.
config.json: Specifies display and interaction configurations.
Example Output:

data.json: 

[
  {
    "type": "scatter",
    "x": [1, 2, 3],
    "y": [4, 5, 6],
    "mode": "markers+lines",
    "name": "Line Plot",
    "marker": {
      "color": "rgba(75, 192, 192, 0.6)",
      "size": 8
    },
    "line": {
      "color": "rgba(75, 192, 192, 1)",
      "width": 2
    }
  }
  // Additional plots if present
]
layout.json:

{
  "title": {
    "text": "Graph Title",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "color": "#000000"
    }
  },
  "xaxis": {
    "title": {
      "text": "X Axis",
      "font": {
        "family": "Arial, sans-serif",
        "size": 18,
        "color": "#000000"
      }
    },
    "showgrid": true,
    "gridcolor": "rgba(0, 0, 0, 0.1)",
    "zeroline": true,
    "zerolinecolor": "rgba(0, 0, 0, 0.1)"
  },
  "yaxis": {
    "title": {
      "text": "Y Axis",
      "font": {
        "family": "Arial, sans-serif",
        "size": 18,
        "color": "#000000"
      }
    },
    "showgrid": true,
    "gridcolor": "rgba(0, 0, 0, 0.1)",
    "zeroline": true,
    "zerolinecolor": "rgba(0, 0, 0, 0.1)"
  },
  "legend": {
    "orientation": "h",
    "x": 0.5,
    "xanchor": "center",
    "y": -0.2,
    "font": {
      "family": "Arial, sans-serif",
      "size": 12,
      "color": "#000000"
    }
  },
  "margin": {
    "l": 60,
    "r": 30,
    "b": 60,
    "t": 60
  },
  "plot_bgcolor": "#ffffff",
  "paper_bgcolor": "#ffffff"
}
config.json:

{
  "responsive": true,
  "displayModeBar": true,
  "modeBarButtonsToRemove": ["toImage"],
  "scrollZoom": true
}
Graph Image: [Attach or provide the image of the graph for analysis]'''

In [7]:
image_loc = 'final Graphs/with_label.jpg'
info = get_image_info(image_loc, prompt1)
print(info)

FileNotFoundError: Could not find image: final Graphs\with_label.jpg

In [6]:
prompt2 = '''
Task: Extract detailed information from a given graph image and generate JSON files (data.json, layout.json, and config.json) compatible with Plotly. Use the specified delimiters to wrap each JSON section.

Instructions:

Graph Analysis:

Identify and categorize the plot types present in the graph (e.g., line, scatter, bar, pie, areaplot etc).
Extract the data points for each plot (x-values, y-values, and any other relevant data such as labels for pie charts).
Note any multiple plots on the same figure and their configurations.
Generate data.json:

For each plot, include details such as:
type: The type of plot (e.g., scatter, bar, pie).
x and y: Arrays of data points for the x and y axes.
labels and values for pie charts.
mode: For line and scatter plots (e.g., markers, lines, or markers+lines).
name: Legend entry for the plot.
marker: Properties such as color, size, and symbol for markers.
line: Properties such as color, width, and dash style for lines.
text: Hover text for each data point.
hoverinfo: Information displayed on hover (e.g., x+y+text).
Generate layout.json:
Copy the same layout as the original image, same color, same font, same size, same position, same orientation, same everything.

Include details such as:
title: The title of the graph, including text, font properties (family, size, color).
xaxis and yaxis: Configuration including title, range, showgrid, gridcolor, zeroline, zerolinecolor, showticklabels, tickangle, tickfont.
legend: Properties including orientation, x, y, xanchor, font.
margin: Values for l (left), r (right), b (bottom), t (top), pad.
plot_bgcolor and paper_bgcolor: Background colors of the plot area and paper.
Generate config.json:

Include configuration settings such as:
responsive: Whether the graph is responsive (boolean).
displayModeBar: Whether the mode bar is displayed (boolean).
modeBarButtonsToRemove: List of mode bar buttons to remove (e.g., ["toImage"]).
scrollZoom: Whether scrolling zoom is enabled (boolean).
Output Format:

Wrap each JSON output in specific XML-like tags:
<data> ... </data>
<layout> ... </layout>
<config> ... </config>
Make sure to follow the JSON formatting guidelines:
Double Quotes for Keys and Strings: Ensure that all keys and string values in JSON are enclosed in double quotes (").
Example: Instead of "type": bar, it should be "type": "bar".
No Trailing Commas: Remove any trailing commas after the last element in arrays ([]) or objects ({}).
Example: Instead of "color": "rgba(55, 128, 191, 0.6)",, it should be "color": "rgba(55, 128, 191, 0.6)"
Example Output:

<data>
[
  {
    "type": "scatter",
    "x": [1, 2, 3],
    "y": [4, 5, 6],
    "mode": "markers+lines",
    "name": "Line Plot",
    "marker": {
      "color": "rgba(75, 192, 192, 0.6)",
      "size": 8
    },
    "line": {
      "color": "rgba(75, 192, 192, 1)",
      "width": 2
    }
  }
  // Additional plots if present
]
</data>
<layout>
{
  "title": {
    "text": "Graph Title",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "color": "#000000"
    }
  },
  "xaxis": {
    "title": {
      "text": "X Axis",
      "font": {
        "family": "Arial, sans-serif",
        "size": 18,
        "color": "#000000"
      }
    },
    "showgrid": true,
    "gridcolor": "rgba(0, 0, 0, 0.1)",
    "zeroline": true,
    "zerolinecolor": "rgba(0, 0, 0, 0.1)"
  },
  "yaxis": {
    "title": {
      "text": "Y Axis",
      "font": {
        "family": "Arial, sans-serif",
        "size": 18,
        "color": "#000000"
      }
    },
    "showgrid": true,
    "gridcolor": "rgba(0, 0, 0, 0.1)",
    "zeroline": true,
    "zerolinecolor": "rgba(0, 0, 0, 0.1)"
  },
  "legend": {
    "orientation": "h",
    "x": 0.5,
    "xanchor": "center",
    "y": -0.2,
    "font": {
      "family": "Arial, sans-serif",
      "size": 12,
      "color": "#000000"
    }
  },
  "margin": {
    "l": 60,
    "r": 30,
    "b": 60,
    "t": 60
  },
  "plot_bgcolor": "#ffffff",
  "paper_bgcolor": "#ffffff"
}
</layout>
<config>
{
  "responsive": true,
  "displayModeBar": true,
  "modeBarButtonsToRemove": ["toImage"],
  "scrollZoom": true
}
</config>
'''

In [7]:
prompt2v2 = '''
Task: Extract detailed information from a given graph image and generate JSON files (data.json, layout.json, and config.json) compatible with Plotly. Use the specified delimiters to wrap each JSON section.

Instructions:

Graph Analysis:

1. Identify and categorize the plot types present in the graph:
   - Common 2D plots: `scatter`, `bar`, `pie`, `area`, `line`, `stacked bar`. Make sure you have correctly identified the chart type
   - Advanced 3D plots: `scatter3d`, `surface`.
2. Extract the data points for each plot:
   - For 2D plots: Extract `x-values`, `y-values`, and any other relevant data (e.g., `labels` for pie charts).
   - For 3D plots: Extract `x-values`, `y-values`, and `z-values`.
3. Note any multiple plots on the same figure and their configurations (e.g., colors, stack groups, fill areas).

Generate `data.json`:

For each plot, include details such as:
- **type**: The type of plot (e.g., `scatter`, `bar`, `pie`, `area`, `scatter3d`, `surface`).
- **x**, **y**, and **z** (if applicable): Arrays of data points for the `x`, `y`, and `z` axes.
- **labels** and **values** for pie charts.
- **mode**: For line and scatter plots (e.g., `markers`, `lines`, `markers+lines`).
- **name**: Legend entry for the plot.
- **marker**: Properties such as color, size, and symbol for markers.
- **line**: Properties such as color, width, and dash style for lines.
- **text**: Hover text for each data point.
- **hoverinfo**: Information displayed on hover (e.g., `x+y+text`).
- **fill**: For area charts, specify the fill (e.g., `tozeroy`).
- **stackgroup**: For stacked bar charts, specify the group for stacking.

Generate `layout.json`:

Include details such as:
- **title**: The title of the graph, including text, font properties (family, size, color).
- **xaxis** and **yaxis**: Configuration including title, range, showgrid, gridcolor, zeroline, zerolinecolor, showticklabels, tickangle, tickfont.
- **zaxis**: Configuration for 3D plots including title, range, showgrid, gridcolor, zeroline, zerolinecolor, showticklabels, tickangle, tickfont.
- **legend**: Properties including orientation, x, y, xanchor, font.
- **margin**: Values for l (left), r (right), b (bottom), t (top), pad.
- **plot_bgcolor** and **paper_bgcolor**: Background colors of the plot area and paper.
- **scene**: For 3D plots, configuration for the 3D scene, including camera angles, lighting, and aspect ratio.

Generate `config.json`:

Include configuration settings such as:
- **responsive**: Whether the graph is responsive (boolean).
- **displayModeBar**: Whether the mode bar is displayed (boolean).
- **modeBarButtonsToRemove**: List of mode bar buttons to remove (e.g., ["toImage"]).
- **scrollZoom**: Whether scrolling zoom is enabled (boolean).

Output Format:

Wrap each JSON output in specific XML-like tags:
- `<data> ... </data>`
- `<layout> ... </layout>`
- `<config> ... </config>`

Make sure to follow the JSON formatting guidelines:
- **Double Quotes for Keys and Strings**: Ensure that all keys and string values in JSON are enclosed in double quotes (").
  - Example: Instead of `"type": bar`, it should be `"type": "bar"`.
- **No Trailing Commas**: Remove any trailing commas after the last element in arrays ([]) or objects ({}).
  - Example: Instead of `"color": "rgba(55, 128, 191, 0.6)",`, it should be `"color": "rgba(55, 128, 191, 0.6)"`.

Example Output:

```xml
<data>
[
  {
    "type": "scatter",
    "x": [1, 2, 3],
    "y": [4, 5, 6],
    "mode": "markers+lines",
    "name": "Line Plot",
    "marker": {
      "color": "rgba(75, 192, 192, 0.6)",
      "size": 8
    },
    "line": {
      "color": "rgba(75, 192, 192, 1)",
      "width": 2
    }
  },
  {
    "type": "area",
    "x": [1, 2, 3],
    "y": [4, 5, 6],
    "mode": "markers+lines",
    "name": "Line Plot",
    "marker": {
      "color": "rgba(75, 192, 192, 0.6)",
      "size": 8
    },
    "line": {
      "color": "rgba(75, 192, 192, 1)",
      "width": 2
    }
  },
  {
    "type": "bar",
    "x": ["A", "B", "C"],
    "y": [10, 15, 13],
    "name": "Bar Chart",
    "marker": {
      "color": "rgba(255, 99, 132, 0.6)"
    },
    "hoverinfo": "x+y+text"
  },
  {
    "type": "scatter",
    "x": [1, 2, 3],
    "y": [6, 7, 8],
    "mode": "lines",
    "name": "Area Chart",
    "fill": "tozeroy",
    "line": {
      "color": "rgba(54, 162, 235, 1)",
      "width": 2
    }
  },
  {
    "type": "scatter3d",
    "x": [1, 2, 3],
    "y": [4, 5, 6],
    "z": [7, 8, 9],
    "mode": "markers",
    "name": "3D Scatter Plot",
    "marker": {
      "color": "rgba(255, 159, 64, 0.8)",
      "size": 5
    },
    "hoverinfo": "x+y+z+text"
  },
  {
    "type": "surface",
    "z": [
      [10, 10.625, 12.5, 15.625, 20],
      [5.625, 6.25, 8.125, 11.25, 15.625],
      [0, 1.25, 3.125, 6.25, 10.625]
    ],
    "name": "Surface Plot",
    "colorscale": "Viridis",
    "hoverinfo": "z+name"
  }
]
</data>
<layout>
{
  "title": {
    "text": "Complex Graph Example",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "color": "#000000"
    }
  },
  "xaxis": {
    "title": {
      "text": "X Axis",
      "font": {
        "family": "Arial, sans-serif",
        "size": 18,
        "color": "#000000"
      }
    },
    "showgrid": true,
    "gridcolor": "rgba(0, 0, 0, 0.1)",
    "zeroline": true,
    "zerolinecolor": "rgba(0, 0, 0, 0.1)"
  },
  "yaxis": {
    "title": {
      "text": "Y Axis",
      "font": {
        "family": "Arial, sans-serif",
        "size": 18,
        "color": "#000000"
      }
    },
    "showgrid": true,
    "gridcolor": "rgba(0, 0, 0, 0.1)",
    "zeroline": true,
    "zerolinecolor": "rgba(0, 0, 0, 0.1)"
  },
  "legend": {
    "orientation": "h",
    "x": 0.5,
    "xanchor": "center",
    "y": -0.2,
    "font": {
      "family": "Arial, sans-serif",
      "size": 12,
      "color": "#000000"
    }
  },
  "margin": {
    "l": 60,
    "r": 30,
    "b": 60,
    "t": 60
  },
  "plot_bgcolor": "#ffffff",
  "paper_bgcolor": "#ffffff",
  "scene": {
    "xaxis": {"title": "X Axis"},
    "yaxis": {"title": "Y Axis"},
    "zaxis": {"title": "Z Axis"}
  }
}
</layout>
<config>
{
  "responsive": true,
  "displayModeBar": true,
  "modeBarButtonsToRemove": ["toImage"],
  "scrollZoom": true
}
</config>
'''

In [7]:
image_loc = 'graphs/Area Plots/areaplot-2.png'
info = get_image_info(image_loc, prompt2v2)
print(info)

 ```xml
<data>
[
  {
    "type": "bar",
    "x": ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"],
    "y": [520, 560, 540, 580, 600, 570, 490, 510, 670, 590, 560, 630],
    "name": "Housing",
    "marker": {
      "color": "rgba(255, 99, 132, 0.6)"
    }
  },
  {
    "type": "bar",
    "x": ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"],
    "y": [400, 420, 410, 430, 390, 410, 440, 420, 380, 400, 410, 430],
    "name": "Food",
    "marker": {
      "color": "rgba(55, 128, 191, 0.6)"
    }
  },
  {
    "type": "bar",
    "x": ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"],
    "y": [200, 220, 210, 230, 190, 210, 240, 220, 180, 200, 210, 230],
    "name": "Utility",
    "marker": {
      "color": "rgba(255, 159, 64, 0.6)"
    }
  },
  {
    "type": "bar",
    "x": ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"],
    "y": [100, 120, 110, 130

In [8]:
plot_from_ai_output_v2(info)

In [9]:
prompt2v3 = ''' Task: Extract detailed information from a given graph image and generate JSON files (data.json, layout.json, and config.json) compatible with Plotly. Use the specified delimiters to wrap each JSON section.

Instructions:

Graph Analysis:

1. Identify and categorize the plot types present in the graph:
   - Common 2D plots: `scatter`, `bar`, `pie`, `area` (as `scatter` with `fill`), `line`, `stacked bar`.
   - Advanced 3D plots: `scatter3d`, `surface`.
2. Extract the data points for each plot:
   - For 2D plots: Extract `x-values`, `y-values`, and any other relevant data (e.g., `labels` for pie charts).
   - For 3D plots: Extract `x-values`, `y-values`, and `z-values`.
3. Note any multiple plots on the same figure and their configurations (e.g., colors, stack groups, fill areas).

Generate `data.json`:

For each plot, include details such as:
- **type**: The type of plot (e.g., `scatter`, `bar`, `pie`, `area`, `scatter3d`, `surface`).
  - For area charts, use `type: "scatter"` with `fill` specified.
- **x**, **y**, and **z** (if applicable): Arrays of data points for the `x`, `y`, and `z` axes.
- **labels** and **values** for pie charts.
- **mode**: For line and scatter plots (e.g., `markers`, `lines`, `markers+lines`).
- **name**: Legend entry for the plot.
- **marker**: Properties such as color, size, and symbol for markers.
- **line**: Properties such as color, width, and dash style for lines.
- **text**: Hover text for each data point.
- **hoverinfo**: Information displayed on hover (e.g., `x+y+text`).
- **fill**: For area charts, specify the fill (e.g., "tozeroy", "tonexty").
- **fillcolor**: The color used to fill the area under the line.
- **stackgroup**: For stacked bar charts, specify the group for stacking.

Generate `layout.json`:

Include details such as:
- **title**: The title of the graph, including text, font properties (family, size, color).
- **xaxis** and **yaxis**: Configuration including title, range, showgrid, gridcolor, zeroline, zerolinecolor, showticklabels, tickangle, tickfont.
- **zaxis**: Configuration for 3D plots including title, range, showgrid, gridcolor, zeroline, zerolinecolor, showticklabels, tickangle, tickfont.
- **legend**: Properties including orientation, x, y, xanchor, font.
- **margin**: Values for l (left), r (right), b (bottom), t (top), pad.
- **plot_bgcolor** and **paper_bgcolor**: Background colors of the plot area and paper.
- **scene**: For 3D plots, configuration for the 3D scene, including camera angles, lighting, and aspect ratio.

Generate `config.json`:

Include configuration settings such as:
- **responsive**: Whether the graph is responsive (boolean).
- **displayModeBar**: Whether the mode bar is displayed (boolean).
- **modeBarButtonsToRemove**: List of mode bar buttons to remove (e.g., ["toImage"]).
- **scrollZoom**: Whether scrolling zoom is enabled (boolean).

Output Format:

Wrap each JSON output in specific XML-like tags:
- `<data> ... </data>`
- `<layout> ... </layout>`
- `<config> ... </config>`

Make sure to follow the JSON formatting guidelines:
- **Double Quotes for Keys and Strings**: Ensure that all keys and string values in JSON are enclosed in double quotes (").
  - Example: Instead of `"type": bar`, it should be `"type": "bar"`.
- **No Trailing Commas**: Remove any trailing commas after the last element in arrays ([]) or objects ({}).
  - Example: Instead of `"color": "rgba(55, 128, 191, 0.6)",`, it should be `"color": "rgba(55, 128, 191, 0.6)"`.

Example Output:

```xml
<data>
[
  {
    "type": "scatter",
    "x": [1, 2, 3],
    "y": [4, 5, 6],
    "mode": "markers+lines",
    "name": "Line Plot",
    "marker": {
      "color": "rgba(75, 192, 192, 0.6)",
      "size": 8
    },
    "line": {
      "color": "rgba(75, 192, 192, 1)",
      "width": 2
    }
  },
  {
    "type": "bar",
    "x": ["A", "B", "C"],
    "y": [10, 15, 13],
    "name": "Bar Chart",
    "marker": {
      "color": "rgba(255, 99, 132, 0.6)"
    },
    "hoverinfo": "x+y+text"
  },
  {
    "type": "scatter",
    "x": [1, 2, 3],
    "y": [6, 7, 8],
    "mode": "lines",
    "fill": "tozeroy",
    "name": "Area Chart",
    "line": {
      "color": "rgba(54, 162, 235, 1)",
      "width": 2
    },
    "fillcolor": "rgba(54, 162, 235, 0.5)"
  },
  {
    "type": "scatter3d",
    "x": [1, 2, 3],
    "y": [4, 5, 6],
    "z": [7, 8, 9],
    "mode": "markers",
    "name": "3D Scatter Plot",
    "marker": {
      "color": "rgba(255, 159, 64, 0.8)",
      "size": 5
    },
    "hoverinfo": "x+y+z+text"
  },
  {
    "type": "surface",
    "z": [
      [10, 10.625, 12.5, 15.625, 20],
      [5.625, 6.25, 8.125, 11.25, 15.625],
      [0, 1.25, 3.125, 6.25, 10.625]
    ],
    "name": "Surface Plot",
    "colorscale": "Viridis",
    "hoverinfo": "z+name"
  }
]
</data>
<layout>
{
  "title": {
    "text": "Complex Graph Example",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "color": "#000000"
    }
  },
  "xaxis": {
    "title": {
      "text": "X Axis",
      "font": {
        "family": "Arial, sans-serif",
        "size": 18,
        "color": "#000000"
      }
    },
    "showgrid": true,
    "gridcolor": "rgba(0, 0, 0, 0.1)",
    "zeroline": true,
    "zerolinecolor": "rgba(0, 0, 0, 0.1)"
  },
  "yaxis": {
    "title": {
      "text": "Y Axis",
      "font": {
        "family": "Arial, sans-serif",
        "size": 18,
        "color": "#000000"
      }
    },
    "showgrid": true,
    "gridcolor": "rgba(0, 0, 0, 0.1)",
    "zeroline": true,
    "zerolinecolor": "rgba(0, 0, 0, 0.1)"
  },
  "legend": {
    "orientation": "h",
    "x": 0.5,
    "xanchor": "center",
    "y": -0.2,
    "font": {
      "family": "Arial, sans-serif",
      "size": 12,
      "color": "#000000"
    }
  },
  "margin": {
    "l": 60,
    "r": 30,
    "b": 60,
    "t": 60
  },
  "plot_bgcolor": "#ffffff",
  "paper_bgcolor": "#ffffff",
  "scene": {
    "xaxis": {"title": "X Axis"},
    "yaxis": {"title": "Y Axis"},
    "zaxis": {"title": "Z Axis"}
  }
}
</layout>
<config>
{
  "responsive": true,
  "displayModeBar": true,
  "modeBarButtonsToRemove": ["toImage"],
  "scrollZoom": true
}
</config>
 '''

In [8]:
prompt2v5 =''' 
Task: Extract detailed information from a given graph image and generate JSON files (`data.json`, `layout.json`, and `config.json`) compatible with Plotly. Use the specified delimiters to wrap each JSON section.

### Instructions:

#### Graph Analysis:

1. **Identify and categorize the plot types** present in the graph:
   - **Common 2D plots**: `scatter`, `line`, `bar`, `pie`, `bubble`, `dot`, `area`, `horizontal bar`, `stacked bar`, `filled area`.
   - **Advanced 3D plots**: `scatter3d`, `surface`.
   - **Specialty plots**: `gantt`, `sunburst`, `table`, `sankey`, `treemap`, `webgl`, `svg`, `icicle`, `patterns`, `hatching`, `texture`, `dumbbell`.

2. **Extract data points** for each plot:
   - For **2D plots**: Extract `x-values`, `y-values`, and other relevant data (e.g., `labels` for pie charts).
   - For **3D plots**: Extract `x-values`, `y-values`, and `z-values`.
   - For **specialty plots**: Extract relevant properties like categories, labels, values, parents, etc.

3. **Identify multiple plots** on the same figure and their configurations (e.g., colors, stack groups, fill areas).

#### Generate `data.json`:

For each plot, include details such as:
- **type**: The type of plot (e.g., `scatter`, `line`, `bar`, `pie`, `bubble`, `dot`, `area`, `horizontal bar`, `stacked bar`, `filled area`, `scatter3d`, `surface`, `gantt`, `sunburst`, `table`, `sankey`, `treemap`, `webgl`, `svg`, `icicle`, `patterns`, `hatching`, `texture`, `dumbbell`).
- **x**, **y**, and **z** (if applicable): Arrays of data points for the `x`, `y`, and `z` axes.
- **labels** and **values** for pie and bubble charts.
- **parents** for hierarchical charts like `sunburst` and `treemap`.
- **mode**: For line and scatter plots (e.g., `markers`, `lines`, `markers+lines`).
- **name**: Legend entry for the plot.
- **marker**: Properties such as color, size, and symbol for markers.
- **line**: Properties such as color, width, and dash style for lines.
- **text**: Hover text for each data point.
- **hoverinfo**: Information displayed on hover (e.g., `x+y+text`).
- **fill**: For area charts, specify the fill (e.g., "tozeroy", "tonexty").
- **fillcolor**: The color used to fill the area under the line.
- **stackgroup**: For stacked bar charts, specify the group for stacking.

#### Generate `layout.json`:

Include details such as:
- **title**: The title of the graph, including text, font properties (family, size, color).
- **xaxis** and **yaxis**: Configuration including title, range, showgrid, gridcolor, zeroline, zerolinecolor, showticklabels, tickangle, tickfont.
- **zaxis**: Configuration for 3D plots including title, range, showgrid, gridcolor, zeroline, zerolinecolor, showticklabels, tickangle, tickfont.
- **legend**: Properties including orientation, x, y, xanchor, font.
- **margin**: Values for l (left), r (right), b (bottom), t (top), pad.
- **plot_bgcolor** and **paper_bgcolor**: Background colors of the plot area and paper.
- **scene**: For 3D plots, configuration for the 3D scene, including camera angles, lighting, and aspect ratio.
- **domain**: For hierarchical and network plots (like `sunburst`, `treemap`, `sankey`).

#### Generate `config.json`:

Include configuration settings such as:
- **responsive**: Whether the graph is responsive (boolean).
- **displayModeBar**: Whether the mode bar is displayed (boolean).
- **modeBarButtonsToRemove**: List of mode bar buttons to remove (e.g., ["toImage"]).
- **scrollZoom**: Whether scrolling zoom is enabled (boolean).

### Output Format:

Wrap each JSON output in specific XML-like tags:
- `<data> ... </data>`
- `<layout> ... </layout>`
- `<config> ... </config>`

Make sure to follow the JSON formatting guidelines:
- **Double Quotes for Keys and Strings**: Ensure that all keys and string values in JSON are enclosed in double quotes (`"`).
  - Example: Instead of `"type": bar`, it should be `"type": "bar"`.
- **No Trailing Commas**: Remove any trailing commas after the last element in arrays ([]) or objects ({}).
  - Example: Instead of `"color": "rgba(55, 128, 191, 0.6)",`, it should be `"color": "rgba(55, 128, 191, 0.6)"`.

### Example Output:

```xml
<data>
[
  {
    "type": "scatter",
    "x": [1, 2, 3],
    "y": [4, 5, 6],
    "mode": "markers+lines",
    "name": "Line Plot",
    "marker": {
      "color": "rgba(75, 192, 192, 0.6)",
      "size": 8
    },
    "line": {
      "color": "rgba(75, 192, 192, 1)",
      "width": 2
    }
  },
  {
    "type": "bar",
    "x": ["A", "B", "C"],
    "y": [10, 15, 13],
    "name": "Bar Chart",
    "marker": {
      "color": "rgba(255, 99, 132, 0.6)"
    },
    "hoverinfo": "x+y+text"
  },
  {
    "type": "scatter",
    "x": [1, 2, 3],
    "y": [6, 7, 8],
    "mode": "lines",
    "fill": "tozeroy",
    "name": "Area Chart",
    "line": {
      "color": "rgba(54, 162, 235, 1)",
      "width": 2
    },
    "fillcolor": "rgba(54, 162, 235, 0.5)"
  },
  {
    "type": "scatter3d",
    "x": [1, 2, 3],
    "y": [4, 5, 6],
    "z": [7, 8, 9],
    "mode": "markers",
    "name": "3D Scatter Plot",
    "marker": {
      "color": "rgba(255, 159, 64, 0.8)",
      "size": 5
    },
    "hoverinfo": "x+y+z+text"
  },
  {
    "type": "surface",
    "z": [
      [10, 10.625, 12.5, 15.625, 20],
      [5.625, 6.25, 8.125, 11.25, 15.625],
      [0, 1.25, 3.125, 6.25, 10.625]
    ],
    "name": "Surface Plot",
    "colorscale": "Viridis",
    "hoverinfo": "z+name"
  },
  {
    "type": "sunburst",
    "labels": ["A", "B", "C", "D"],
    "parents": ["", "A", "A", "C"],
    "values": [10, 20, 30, 40],
    "name": "Sunburst Chart"
  },
  {
    "type": "table",
    "header": {
      "values": [["Column 1"], ["Column 2"]],
      "align": "center",
      "line": {"width": 1, "color": "black"},
      "fill": {"color": "gray"},
      "font": {"family": "Arial", "size": 12, "color": "white"}
    },
    "cells": {
      "values": [[1, 2, 3], [4, 5, 6]],
      "align": "center",
      "line": {"width": 1, "color": "black"},
      "fill": {"color": ["white", "lightgray"]},
      "font": {"family": "Arial", "size": 12, "color": ["black"]}
    }
  }
]
</data>
<layout>
{
  "title": {
    "text": "Complex Graph Example",
    "font": {
      "family": "Arial, sans-serif",
      "size

": 24,
      "color": "#000000"
    }
  },
  "xaxis": {
    "title": {
      "text": "X Axis",
      "font": {
        "family": "Arial, sans-serif",
        "size": 18,
        "color": "#000000"
      }
    },
    "showgrid": true,
    "gridcolor": "rgba(0, 0, 0, 0.1)",
    "zeroline": true,
    "zerolinecolor": "rgba(0, 0, 0, 0.1)"
  },
  "yaxis": {
    "title": {
      "text": "Y Axis",
      "font": {
        "family": "Arial, sans-serif",
        "size": 18,
        "color": "#000000"
      }
    },
    "showgrid": true,
    "gridcolor": "rgba(0, 0, 0, 0.1)",
    "zeroline": true,
    "zerolinecolor": "rgba(0, 0, 0, 0.1)"
  },
  "legend": {
    "orientation": "h",
    "x": 0.5,
    "xanchor": "center",
    "y": -0.2,
    "font": {
      "family": "Arial, sans-serif",
      "size": 12,
      "color": "#000000"
    }
  },
  "margin": {
    "l": 60,
    "r": 30,
    "b": 60,
    "t": 60
  },
  "plot_bgcolor": "#ffffff",
  "paper_bgcolor": "#ffffff",
  "scene": {
    "xaxis": {"title": "X Axis"},
    "yaxis": {"title": "Y Axis"},
    "zaxis": {"title": "Z Axis"}
  }
}
</layout>
<config>
{
  "responsive": true,
  "displayModeBar": true,
  "modeBarButtonsToRemove": ["toImage"],
  "scrollZoom": true
}
</config>
```
'''

In [9]:
image_loc = 'graphs/Area Plots/areaplot-2.png'
info = get_image_info(image_loc, prompt2v5)
print(info)

 ```xml
<data>
[
  {
    "type": "bar",
    "x": ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"],
    "y": [520, 550, 510, 540, 580, 550, 480, 500, 600, 520, 530, 670],
    "name": "Housing"
  },
  {
    "type": "bar",
    "x": ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"],
    "y": [400, 420, 380, 410, 450, 420, 350, 370, 480, 400, 410, 500],
    "name": "Food"
  },
  {
    "type": "bar",
    "x": ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"],
    "y": [200, 210, 190, 200, 220, 210, 180, 190, 240, 200, 210, 250],
    "name": "Utility"
  },
  {
    "type": "bar",
    "x": ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"],
    "y": [100, 110, 90, 100, 120, 110, 80, 90, 120, 100, 110, 130],
    "name": "Insurance"
  }
]
</data>
<layout>
{
  "title": {
    "text": "Monthly Expenses",
    "font": {
      "family": "Arial, sans-serif",
      "

In [28]:
def convert_plotly_chart(data: str, type_from: str, type_to: str) -> str:
    # Define the conversion prompt
    prompt = f'''
    Task: Convert a given Plotly chart configuration from {type_from} to {type_to}. Generate updated `data.json`, `layout.json`, and `config.json` files compatible with the new chart type. Use the specified delimiters to wrap each JSON section.

    Instructions:

    Input:
    You are provided with the current chart configuration in XML format:
    ```xml
    {data}
    ```

    Output:
    Convert the provided chart to {type_to} type while preserving as much relevant data and layout configuration as possible. Wrap the updated JSON outputs in XML-like tags:
    - `<data> ... </data>`
    - `<layout> ... </layout>`
    - `<config> ... </config>`

    Guidelines:
    1. **Preserve Data Integrity**: Keep the data points (`x`, `y`, `z`) intact and correctly map them to the new chart type.
    2. **Adjust Layout**: Update titles, axis labels, and any specific configurations to fit the new chart type.
    3. **Chart-Specific Adjustments**: Apply properties unique to the new chart type, such as `mode` for `scatter`, `colorscale` for `heatmap`, `fill` for `area`, etc.
    4. **Update Config**: Ensure configuration settings are suitable for the new chart type, maintaining features like responsiveness and display options.

    Example Conversion:

    Given a `bar` chart as input and converting to a `line` chart:

    **Input**:
    ```xml
    <data>
    [
      {{
        "type": "bar",
        "x": ["A", "B", "C"],
        "y": [10, 15, 12],
        "name": "Bar Chart"
      }}
    ]
    </data>
    <layout>
    {{
      "title": {{"text": "Bar Chart Example"}}
    }}
    </layout>
    <config>
    {{
      "responsive": true
    }}
    </config>
    ```

    **Output**:
    ```xml
    <data>
    [
      {{
        "type": "scatter",
        "x": ["A", "B", "C"],
        "y": [10, 15, 12],
        "mode": "lines+markers",
        "name": "Line Chart"
      }}
    ]
    </data>
    <layout>
    {{
      "title": {{"text": "Line Chart Example"}},
      "xaxis": {{"title": "Category"}},
      "yaxis": {{"title": "Values"}}
    }}
    </layout>
    <config>
    {{
      "responsive": true
    }}
    </config>
    ```

    Use the above instructions to convert the provided Plotly chart to {type_to}.
    '''

    # Ensure `gemini_model` accepts a structured request if necessary
    # Here, assume `gemini_model` can handle a prompt directly
    return gemini_model(data+prompt)


In [29]:
info = convert_plotly_chart(info, 'area', 'pie')

In [30]:
info

'**<data> ... </data>**\n```xml\n<data>\n[\n  {\n    "type": "pie",\n    "values": [1500, 1200, 600, 300],\n    "labels": ["Housing", "Food", "Utility", "Insurance"],\n    "hole": 0.4,\n    "name": "Monthly Expenses",\n    "textinfo": "label+percent",\n    "textposition": "inside"\n  }\n]\n</data>\n```\n\n**<layout> ... </layout>**\n```xml\n<layout>\n{\n  "title": {\n    "text": "Monthly Expenses",\n    "font": {\n      "family": "Arial, sans-serif",\n      "size": 24,\n      "color": "#000000"\n    }\n  },\n  "showlegend": true,\n  "margin": {\n    "l": 60,\n    "r": 30,\n    "b": 60,\n    "t": 60\n  },\n  "plot_bgcolor": "#ffffff",\n  "paper_bgcolor": "#ffffff"\n}\n</layout>\n```\n\n**<config> ... </config>**\n```xml\n<config>\n{\n  "responsive": true,\n  "displayModeBar": true,\n  "modeBarButtonsToRemove": ["toImage"],\n  "scrollZoom": true\n}\n</config>\n```'

In [26]:
# Code to get information using Gemini model

def gemini_model(prompt):
    
    genai.configure(api_key=gemini_key)

    model = genai.GenerativeModel(model_name="gemini-pro")

    template = prompt

    response = model.generate_content(template)
    return str(response.text)

In [31]:
plot_from_ai_output_v2(info)

ValueError: Invalid JSON: Expecting value: line 1 column 1 (char 0)

In [30]:
image_loc = 'graphs\Area Plots\Matplotlib-with-Pandas-Area-Plot.png'
info = get_image_info(image_loc, prompt2v3)
print(info)

 ```xml
<data>
[
  {
    "type": "scatter",
    "x": [1, 2, 3, 4, 5, 6, 7, 8],
    "y": [0.2, 0.5, 1.1, 2.0, 1.8, 2.2, 1.5, 3.3],
    "name": "A"
  },
  {
    "type": "scatter",
    "x": [1, 2, 3, 4, 5, 6, 7, 8],
    "y": [0.1, 0.3, 0.7, 1.2, 1.1, 1.5, 1.0, 2.5],
    "name": "B"
  },
  {
    "type": "scatter",
    "x": [1, 2, 3, 4, 5, 6, 7, 8],
    "y": [0.3, 0.6, 1.3, 2.4, 2.2, 2.6, 1.8, 3.7],
    "name": "C"
  },
  {
    "type": "scatter",
    "x": [1, 2, 3, 4, 5, 6, 7, 8],
    "y": [0.0, 0.2, 0.5, 1.0, 0.9, 1.2, 0.8, 2.0],
    "name": "D"
  },
  {
    "type": "scatter",
    "x": [1, 2, 3, 4, 5, 6, 7, 8],
    "y": [0.4, 0.8, 1.5, 2.6, 2.4, 2.8, 2.0, 3.9],
    "name": "E"
  }
]
</data>
<layout>
{
  "title": {
    "text": "Area Chart"
  },
  "xaxis": {
    "title": {
      "text": "Time"
    }
  },
  "yaxis": {
    "title": {
      "text": "Value"
    }
  },
  "legend": {
    "orientation": "h",
    "y": -0.5
  }
}
</layout>
<config>
{
  "responsive": true
}
</config>
```


In [31]:
plot_from_ai_output_v2(info)

In [17]:
image_loc = 'pie/1.jpeg'
info2 = get_image_info(image_loc, prompt2)
print(info2)
plot_from_ai_output(info2)

 <data>
[
  {
    "type": "pie",
    "labels": ["Orange", "Mango", "Banana", "Apple"],
    "values": [25, 25, 25, 25]
  }
]
</data>
<layout>
{
  "title": {
    "text": "Fruit Market Share",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "color": "#000000"
    }
  },
  "legend": {
    "orientation": "h",
    "x": 0.5,
    "xanchor": "center",
    "y": -0.2,
    "font": {
      "family": "Arial, sans-serif",
      "size": 12,
      "color": "#000000"
    }
  },
  "margin": {
    "l": 60,
    "r": 30,
    "b": 60,
    "t": 60
  },
  "plot_bgcolor": "#ffffff",
  "paper_bgcolor": "#ffffff"
}
</layout>
<config>
{
  "responsive": true,
  "displayModeBar": true,
  "modeBarButtonsToRemove": ["toImage"],
  "scrollZoom": true
}
</config>


In [18]:
image_loc = '1711725089159.png'
info1 = get_image_info(image_loc, prompt2)
print(info1)
plot_from_ai_output(info1)

 <data>
[
  {
    "type": "bar",
    "x": ["Africa", "East Asia", "South Asia", "Western Asia", "Latin America and the Caribbean"],
    "y": [-1.7, -0.8, -1.5, -1.3, -0.9],
    "name": "Developing regions",
    "marker": {
      "color": "rgba(55, 128, 191, 0.6)"
    }
  }
]
</data>
<layout>
{
  "title": {
    "text": "Developing regions",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "color": "#000000"
    }
  },
  "xaxis": {
    "title": {
      "text": "Region",
      "font": {
        "family": "Arial, sans-serif",
        "size": 18,
        "color": "#000000"
      }
    },
    "showgrid": true,
    "gridcolor": "rgba(0, 0, 0, 0.1)",
    "zeroline": true,
    "zerolinecolor": "rgba(0, 0, 0, 0.1)",
    "tickangle": -45
  },
  "yaxis": {
    "title": {
      "text": "Percentage of 2019 GDP",
      "font": {
        "family": "Arial, sans-serif",
        "size": 18,
        "color": "#000000"
      }
    },
    "showgrid": true,
    "gridcolor": "rgba(0,

In [19]:
image_loc = 'pie/2.jpeg'
info2 = get_image_info(image_loc, prompt2)
print(info2)
plot_from_ai_output(info2)

 <data>
[
  {
    "type": "pie",
    "labels": ["Task 1", "Task 2", "Task 3", "Task 4", "Task 5", "Task 6", "Task 7"],
    "values": [30, 120, 60, 120, 90, 30, 60],
    "hoverinfo": "label+value+percent",
    "textinfo": "label+value+percent",
    "marker": {
      "colors": ["rgba(255, 102, 102, 0.6)", "rgba(255, 153, 102, 0.6)", "rgba(255, 204, 102, 0.6)", "rgba(153, 255, 102, 0.6)", "rgba(102, 255, 153, 0.6)", "rgba(102, 255, 204, 0.6)", "rgba(102, 153, 255, 0.6)"]
    }
  }
]
</data>
<layout>
{
  "title": {
    "text": "Daily Task Breakdown",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "color": "#000000"
    }
  },
  "legend": {
    "orientation": "h",
    "x": 0.5,
    "xanchor": "center",
    "y": -0.2,
    "font": {
      "family": "Arial, sans-serif",
      "size": 12,
      "color": "#000000"
    }
  },
  "margin": {
    "l": 60,
    "r": 30,
    "b": 60,
    "t": 60
  },
  "plot_bgcolor": "#ffffff",
  "paper_bgcolor": "#ffffff"
}
</layout>
<conf

In [22]:
image_loc = 'pie/3.jpg'
info2 = get_image_info(image_loc, prompt2)
print(info2)
plot_from_ai_output(info2)

  <data>
[
  {
    "type": "pie",
    "labels": ["Bronchiectasis", "Capillaritis", "Exudative phase", "Interstitial edema", "Microthrombi", "Proliferative phase", "Vasculitis"],
    "values": [27, 22, 25, 10, 10, 4, 2]
  }
]
</data>
<layout>
{
  "title": {
    "text": "Distribution of Patterns",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "color": "#000000"
    }
  },
  "legend": {
    "orientation": "h",
    "x": 0.5,
    "xanchor": "center",
    "y": -0.2,
    "font": {
      "family": "Arial, sans-serif",
      "size": 12,
      "color": "#000000"
    }
  },
  "margin": {
    "l": 60,
    "r": 30,
    "b": 60,
    "t": 60
  },
  "plot_bgcolor": "#ffffff",
  "paper_bgcolor": "#ffffff"
}
</layout>
<config>
{
  "responsive": true,
  "displayModeBar": true,
  "modeBarButtonsToRemove": ["toImage"],
  "scrollZoom": true
}
</config>


In [24]:
image_loc = 'new graphs/bar/2.2_image2.png'
info2 = get_image_info(image_loc, prompt2)
print(info2)
plot_from_ai_output(info2)

 <data>
[
  {
    "type": "bar",
    "x": [
      "People living in poverty",
      "Immigrants",
      "Persons with disabilities",
      "Older persons",
      "Women",
      "Youth"
    ],
    "y": [
      144,
      137,
      148,
      152,
      162,
      155
    ],
    "name": "2020",
    "marker": {
      "color": "rgba(55, 128, 191, 0.6)"
    }
  },
  {
    "type": "bar",
    "x": [
      "People living in poverty",
      "Immigrants",
      "Persons with disabilities",
      "Older persons",
      "Women",
      "Youth"
    ],
    "y": [
      163,
      167,
      157,
      144,
      152,
      142
    ],
    "name": "2022",
    "marker": {
      "color": "rgba(255, 127, 14, 0.6)"
    }
  }
]
</data>
<layout>
{
  "title": {
    "text": "Number of countries offering services for people in vulnerable situations that can be completed partially or fully online, 2020 and 2022 (Percentage change)",
    "font": {
      "family": "Arial, sans-serif",
      "size": 18,
      "col

In [25]:
image_loc = 'new graphs/bar/2.2_image3.png'
info2 = get_image_info(image_loc, prompt2)
print(info2)
plot_from_ai_output(info2)

 <data>
[
  {
    "type": "bar",
    "x": [
      "Youth", 
      "Women", 
      "Older People", 
      "Immigrants", 
      "Persons with Disabilities", 
      "People living in poverty"
    ],
    "y": [
      35, 
      41, 
      34, 
      30, 
      36, 
      31
    ],
    "name": "2018",
    "marker": {
      "color": "rgba(55, 128, 191, 0.6)"
    }
  },
  {
    "type": "bar",
    "x": [
      "Youth", 
      "Women", 
      "Older People", 
      "Immigrants", 
      "Persons with Disabilities", 
      "People living in poverty"
    ],
    "y": [
      36, 
      33, 
      32, 
      27, 
      37, 
      22
    ],
    "name": "2020",
    "marker": {
      "color": "rgba(255, 127, 14, 0.6)"
    }
  },
  {
    "type": "bar",
    "x": [
      "Youth", 
      "Women", 
      "Older People", 
      "Immigrants", 
      "Persons with Disabilities", 
      "People living in poverty"
    ],
    "y": [
      36, 
      42, 
      34, 
      41, 
      42, 
      31
    ],
    "name"

In [26]:
image_loc = 'new graphs/bar/2.4_image1.png'
info2 = get_image_info(image_loc, prompt2)
print(info2)
plot_from_ai_output(info2)

  <data>
[
  {
    "type": "bar",
    "x": [
      "High Income",
      "Upper Middle Income",
      "Lower Middle Income",
      "Low Income"
    ],
    "y": [
      24,
      26,
      23,
      13
    ],
    "name": "Number of Cities",
    "marker": {
      "color": [
        "rgba(55, 128, 191, 0.6)",
        "rgba(55, 128, 191, 0.6)",
        "rgba(55, 128, 191, 0.6)",
        "rgba(55, 128, 191, 0.6)"
      ]
    }
  },
  {
    "type": "bar",
    "x": [
      "High Income",
      "Upper Middle Income",
      "Lower Middle Income",
      "Low Income"
    ],
    "y": [
      9,
      5,
      14,
      5
    ],
    "name": "Very High LOSI",
    "marker": {
      "color": [
        "rgba(255, 127, 14, 0.6)",
        "rgba(255, 127, 14, 0.6)",
        "rgba(255, 127, 14, 0.6)",
        "rgba(255, 127, 14, 0.6)"
      ]
    }
  },
  {
    "type": "bar",
    "x": [
      "High Income",
      "Upper Middle Income",
      "Lower Middle Income",
      "Low Income"
    ],
    "y": [
      

In [27]:
image_loc = 'new graphs/linechart_other1.jpg'
info2 = get_image_info(image_loc, prompt2)
print(info2)
plot_from_ai_output(info2)

 <data>
[
  {
    "type": "scatter",
    "x": [1972, 1974, 1976, 1978, 1980, 1982, 1984, 1986, 1988, 1990, 1992, 1994, 1996, 1998, 2000, 2002, 2004, 2006, 2008, 2010, 2012, 2014, 2016],
    "y": [2.01, 2.03, 2.25, 2.23, 2.19, 2.19, 2.25, 2.25, 2.21, 2.21, 2.19, 2.19, 2.19, 2.21, 2.23, 2.23, 2.27, 2.29, 2.29, 2.19, 2.19, 2.17, 2.15],
    "mode": "lines",
    "name": "Happiness",
    "line": {
      "color": "rgba(255, 127, 14, 1)",
      "width": 3
    }
  },
  {
    "type": "scatter",
    "x": [1972, 1974, 1976, 1978, 1980, 1982, 1984, 1986, 1988, 1990, 1992, 1994, 1996, 1998, 2000, 2002, 2004, 2006, 2008, 2010, 2012, 2014, 2016],
    "y": [20000, 22000, 24000, 26000, 28000, 30000, 32000, 34000, 36000, 38000, 40000, 42000, 44000, 46000, 48000, 50000, 52000, 53000, 54000, 55000, 55000, 53000, 51000],
    "mode": "lines",
    "name": "GDP Per Capita",
    "line": {
      "color": "rgba(14, 127, 255, 1)",
      "width": 3
    }
  }
]
</data>
<layout>
{
  "title": {
    "text": "Average Ha

In [10]:
import time
import json
from docx import Document
from docx.shared import Inches
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
from docx.oxml import OxmlElement
from docx.oxml.ns import qn

# Function to update or create the documentation
def update_doc(doc_path, image_loc, prompt, data, layout, config, updated_data, updated_layout,updated_config, chart_path, performance_metrics):

    # Check if the document exists or create a new one
    try:
        document = Document(doc_path)
        document.add_page_break()
    except:
        document = Document()
        document.add_heading('Research Project Documentation', level=1)
        document.add_page_break()
    
    # Add a new section with a title
    document.add_heading('Test Run ', level=1)
    
    # Add the image with caption
    document.add_picture(image_loc, width=Inches(4.5))
    last_paragraph = document.paragraphs[-1]
    last_paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
    caption = document.add_paragraph(f'Figure: {image_loc}')
    caption.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
    
    # Add Prompt
    document.add_heading('Prompt Used', level=2)
    prompt_paragraph = document.add_paragraph(prompt)
    prompt_paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT
    
    # Add Extracted JSON
    document.add_heading('Extracted JSON', level=2)
    document.add_heading('data', level=3)
    json_paragraph = document.add_paragraph(json.dumps(data, indent=4))
    json_paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT
    document.add_heading('layout', level=3)
    json_paragraph = document.add_paragraph(json.dumps(layout, indent=4))
    json_paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT
    document.add_heading('config', level=3)
    json_paragraph = document.add_paragraph(json.dumps(config, indent=4))
    json_paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT
    
    
    # Add Updated JSON
    document.add_heading('Updated JSON', level=2)
    document.add_heading('updated_data', level=3)
    json_paragraph = document.add_paragraph(json.dumps(updated_data, indent=4))
    json_paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT
    document.add_heading('updated_layout', level=3)
    json_paragraph = document.add_paragraph(json.dumps(updated_layout, indent=4))
    json_paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT
    document.add_heading('updated_config', level=3)
    json_paragraph = document.add_paragraph(json.dumps(updated_config, indent=4))
    json_paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT
    
    # Add Updated Chart
    document.add_heading('Updated Chart', level=2)
    document.add_picture(chart_path, width=Inches(4.5))
    last_paragraph = document.paragraphs[-1]
    last_paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
    
    # Add Performance Metrics Table
    document.add_heading('Performance Metrics', level=2)
    table = document.add_table(rows=1, cols=4)
    table.style = 'Table Grid'
    
    # Define table headers
    headers = ["Task", "Model Used", "Description", "Time Taken (s)"]
    hdr_cells = table.rows[0].cells
    for i, header in enumerate(headers):
        hdr_cells[i].text = header
        hdr_cells[i].paragraphs[0].alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
        hdr_cells[i].paragraphs[0].bold = True
    
    # Add metrics data to the table
    for metric in performance_metrics:
        row_cells = table.add_row().cells
        for i, value in enumerate(metric):
            row_cells[i].text = value
            row_cells[i].paragraphs[0].alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
    
    # Save the document
    document.save(doc_path)
    print(f"Document saved as {doc_path}")


In [8]:
def update_info(extracted_json):
    return extracted_json

In [12]:
def trail_run(image_loc,prompt):
    chart_path = 'new_image.png'

    # Placeholder functions for simulation
    start_time = time.time()
    extracted_json = get_image_info(image_loc, prompt)
    time_info = time.time() - start_time
    print("Extracted Json:/n",extracted_json)
    data_json = extract_json_section(extracted_json, "data")
    layout_json = extract_json_section(extracted_json, "layout")
    config_json = extract_json_section(extracted_json, "config")
    # Convert JSON strings to Python dictionaries
    data = json.loads(data_json) if data_json else None
    layout = json.loads(layout_json) if layout_json else None
    config = json.loads(config_json) if config_json else None

    start_time = time.time()
    updated_json = update_info(extracted_json)
    time_update = time.time() - start_time
    print("Updated Json:/n",updated_json)
    updated_data_json = extract_json_section(updated_json, "data")
    updated_layout_json = extract_json_section(updated_json, "layout")
    updated_config_json = extract_json_section(updated_json,"config")
    # Convert JSON strings to Python dictionaries
    updated_data = json.loads(updated_data_json) if updated_data_json else None
    updated_layout = json.loads(updated_layout_json) if updated_layout_json else None
    updated_config = json.loads(updated_config_json) if updated_config_json else None

    # Simulate saving the plot image
    start_time = time.time()
    plot_from_ai_output_v2(updated_json)
    time_plot = time.time() - start_time

    # Define performance metrics
    performance_metrics = [
        ["Processing Image to JSON", "Gemini Vision Pro (default)", "Time taken to convert the uploaded image to JSON format", f"{time_info:.2f}"],
        ["Updating JSON", "ChatGPT 3.5 (Default)", "Time taken to update the JSON with new data", f"{time_update:.2f}"],
        ["Plotting Updated JSON", "N/A", "Time taken to plot the updated JSON", f"{time_plot:.2f}"]  
    ]

    # Call the update_doc function
    doc_path = 'Research_Project_Documentation.docx'
    update_doc(doc_path, image_loc, prompt, data, layout, config, updated_data, updated_layout,updated_config, chart_path, performance_metrics)

Trail Running

In [42]:
trail_run('final Graphs/with_label.jpg', prompt2)

Extracted Json:/n   <data>
[
  {
    "type": "line",
    "x": ["Oct-21", "Nov-22", "Dec-21", "Jan-22", "Feb-22", "Mar-22", "Apr-22", "May-22", "June-22", "Jul-22"],
    "y": [0.85, 1.87, 4.05, 5.43, 5.85, 7.68, 8.31, 7.97, 7.75, 6.69],
    "mode": "lines",
    "name": "Inflation Rate",
    "line": {
      "color": "rgba(75, 192, 192, 1)",
      "width": 2
    }
  }
]
</data>
<layout>
{
  "title": {
    "text": "All India year on year inflation Rate",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "color": "#000000"
    }
  },
  "xaxis": {
    "title": {
      "text": "Month",
      "font": {
        "family": "Arial, sans-serif",
        "size": 18,
        "color": "#000000"
      }
    },
    "showgrid": true,
    "gridcolor": "rgba(0, 0, 0, 0.1)",
    "zeroline": true,
    "zerolinecolor": "rgba(0, 0, 0, 0.1)",
    "tickangle": 45
  },
  "yaxis": {
    "title": {
      "text": "Inflation Rate",
      "font": {
        "family": "Arial, sans-serif",
      

Document saved as Research_Project_Documentation.docx


In [34]:
trail_run('final Graphs/with_label.jpg', prompt2)

In [44]:
trail_run('new graphs/bar/2.2_image2.png', prompt2)

Extracted Json:/n  <data>
[
  {
    "type": "bar",
    "x": [
      "People living in poverty",
      "Immigrants",
      "Persons with disabilities",
      "Older persons",
      "Women",
      "Youth"
    ],
    "y": [
      144,
      137,
      148,
      152,
      162,
      155
    ],
    "name": "2020",
    "marker": {
      "color": "rgba(55, 128, 191, 0.6)"
    }
  },
  {
    "type": "bar",
    "x": [
      "People living in poverty",
      "Immigrants",
      "Persons with disabilities",
      "Older persons",
      "Women",
      "Youth"
    ],
    "y": [
      163,
      167,
      157,
      144,
      152,
      142
    ],
    "name": "2022",
    "marker": {
      "color": "rgba(255, 127, 14, 0.6)"
    }
  }
]
</data>
<layout>
{
  "title": {
    "text": "Number of countries offering services for people in vulnerable situations that can be completed partially or fully online, 2020 and 2022 (Percentage change)",
    "font": {
      "family": "Arial, sans-serif",
      "siz

Document saved as Research_Project_Documentation.docx


In [45]:
trail_run('new graphs/bar/2.2_image3.png', prompt2)

Extracted Json:/n  <data>
[
  {
    "type": "bar",
    "x": [
      "Youth", 
      "Women", 
      "Older People", 
      "Immigrants", 
      "Persons with Disabilities", 
      "People living in poverty"
    ],
    "y": [
      35, 
      41, 
      34, 
      30, 
      36, 
      31
    ],
    "name": "2018",
    "marker": {
      "color": "rgba(55, 128, 191, 0.6)"
    }
  },
  {
    "type": "bar",
    "x": [
      "Youth", 
      "Women", 
      "Older People", 
      "Immigrants", 
      "Persons with Disabilities", 
      "People living in poverty"
    ],
    "y": [
      36, 
      33, 
      32, 
      27, 
      37, 
      22
    ],
    "name": "2020",
    "marker": {
      "color": "rgba(255, 127, 14, 0.6)"
    }
  },
  {
    "type": "bar",
    "x": [
      "Youth", 
      "Women", 
      "Older People", 
      "Immigrants", 
      "Persons with Disabilities", 
      "People living in poverty"
    ],
    "y": [
      36, 
      42, 
      34, 
      41, 
      42, 
      31

Document saved as Research_Project_Documentation.docx


In [46]:
trail_run('new graphs/bar/2.2_image4.png', prompt2)

Extracted Json:/n  <data>
[
  {
    "type": "bar",
    "x": [
      "Africa",
      "Oceania",
      "Asia",
      "Americas",
      "Europe",
      "Global average"
    ],
    "y": [
      -22,
      1,
      28,
      27,
      20,
      3
    ],
    "name": "Fixed (wired) broadband",
    "marker": {
      "color": "rgba(55, 128, 191, 0.6)"
    }
  },
  {
    "type": "bar",
    "x": [
      "Africa",
      "Oceania",
      "Asia",
      "Americas",
      "Europe",
      "Global average"
    ],
    "y": [
      13,
      6,
      25,
      14,
      7,
      2
    ],
    "name": "Active mobile-broadband",
    "marker": {
      "color": "rgba(255, 127, 14, 0.6)"
    }
  },
  {
    "type": "bar",
    "x": [
      "Africa",
      "Oceania",
      "Asia",
      "Americas",
      "Europe",
      "Global average"
    ],
    "y": [
      -2,
      -8,
      -11,
      -8,
      -5,
      -3
    ],
    "name": "Mobile cellular telephone",
    "marker": {
      "color": "rgba(255, 0, 0, 0.6)"


Document saved as Research_Project_Documentation.docx


In [47]:
trail_run('new graphs/bar/2.3_image1.png', prompt2)

Extracted Json:/n  <data>
[
  {
    "type": "bar",
    "x": [
      "All CS",
      "LDCs",
      "LDC/LLDCs",
      "LDCs/SIDS",
      "LLDCs",
      "SIDS"
    ],
    "y": [
      0.4736,
      0.35,
      0.3495,
      0.366,
      0.6379,
      0.5814
    ],
    "name": "2022",
    "marker": {
      "color": "rgba(55, 128, 191, 0.6)"
    }
  },
  {
    "type": "bar",
    "x": [
      "All CS",
      "LDCs",
      "LDC/LLDCs",
      "LDCs/SIDS",
      "LLDCs",
      "SIDS"
    ],
    "y": [
      0.4605,
      0.3387,
      0.3348,
      0.3500,
      0.4671,
      0.5255
    ],
    "name": "2020",
    "marker": {
      "color": "rgba(255, 128, 0, 0.6)"
    }
  }
]
</data>
<layout>
{
  "title": {
    "text": "Average EGDI values for countries in special situations, 2020 and 2022",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "color": "#000000"
    }
  },
  "xaxis": {
    "title": {
      "text": "Countries",
      "font": {
        "family": "Arial, san

Document saved as Research_Project_Documentation.docx


In [49]:
trail_run('new graphs/bar/2.4_image1.png', prompt2)

Extracted Json:/n   <data>
[
  {
    "type": "bar",
    "x": [
      "High Income",
      "Upper Middle Income",
      "Lower Middle Income",
      "Low Income"
    ],
    "y": [
      24,
      26,
      23,
      13
    ],
    "name": "Number of Cities",
    "marker": {
      "color": [
        "rgba(55, 128, 191, 0.6)",
        "rgba(55, 128, 191, 0.6)",
        "rgba(55, 128, 191, 0.6)",
        "rgba(55, 128, 191, 0.6)"
      ]
    }
  },
  {
    "type": "bar",
    "x": [
      "High Income",
      "Upper Middle Income",
      "Lower Middle Income",
      "Low Income"
    ],
    "y": [
      9,
      5,
      14,
      5
    ],
    "name": "Very High LOSI",
    "marker": {
      "color": [
        "rgba(255, 127, 14, 0.6)",
        "rgba(255, 127, 14, 0.6)",
        "rgba(255, 127, 14, 0.6)",
        "rgba(255, 127, 14, 0.6)"
      ]
    }
  },
  {
    "type": "bar",
    "x": [
      "High Income",
      "Upper Middle Income",
      "Lower Middle Income",
      "Low Income"
    ],

Document saved as Research_Project_Documentation.docx


In [50]:
trail_run('new graphs/bar/2.6_image1.png', prompt2)

Extracted Json:/n  <data>
[
  {
    "type": "bar",
    "x": [
      "Justice",
      "Environment",
      "Social protection",
      "Employment",
      "Education",
      "Health"
    ],
    "y": [
      60,
      72,
      30,
      40,
      35,
      70
    ],
    "name": "Number of countries with evidence of online consultations held in the preceding 12 months, by sector"
  }
]
</data>
<layout>
{
  "title": {
    "text": "Number of countries with evidence of online consultations held in the preceding 12 months, by sector, 2014, 2016 and 2020",
    "font": {
      "family": "Arial, sans-serif",
      "size": 18,
      "color": "#333333"
    }
  },
  "xaxis": {
    "title": {
      "text": "Sector",
      "font": {
        "family": "Arial, sans-serif",
        "size": 14,
        "color": "#333333"
      }
    },
    "tickfont": {
      "family": "Arial, sans-serif",
      "size": 12,
      "color": "#333333"
    }
  },
  "yaxis": {
    "title": {
      "text": "Number of countries

Document saved as Research_Project_Documentation.docx


In [51]:
trail_run('new graphs/bar/Others1.png', prompt2)

Extracted Json:/n  <data>
[
  {
    "type": "bar",
    "x": ["193 Member States", "Africa", "Americas", "Asia", "Europe", "Oceania"],
    "y": [0.6102, 0.4054, 0.6438, 0.6493, 0.6256, 0.5081],
    "name": "EGDI 2022 average",
    "marker": {
      "color": "rgba(55, 128, 191, 0.6)"
    }
  },
  {
    "type": "scatter",
    "x": ["193 Member States", "Africa", "Americas", "Asia", "Europe", "Oceania"],
    "y": [0.9717, 0.0852, 0.9151, 0.9529, 0.8305, 0.9432],
    "mode": "lines",
    "name": "Maximum",
    "line": {
      "color": "rgba(255, 127, 14, 1)",
      "width": 2
    }
  },
  {
    "type": "scatter",
    "x": ["193 Member States", "Africa", "Americas", "Asia", "Europe", "Oceania"],
    "y": [0.0852, 0.7357, 0.2481, 0.2710, 0.3230, 0.3080],
    "mode": "lines",
    "name": "Minimum",
    "line": {
      "color": "rgba(255, 127, 14, 1)",
      "width": 2
    }
  }
]
</data>
<layout>
{
  "title": {
    "text": "Global and regional EGDI averages, 2022",
    "font": {
      "family"

Document saved as Research_Project_Documentation.docx


In [52]:
trail_run('new graphs/bar/Others2.png', prompt2)

Extracted Json:/n   <data>
[
  {
    "type": "bar",
    "x": [
      "Low income",
      "Lower-middle income",
      "Upper-middle income",
      "High income",
      "All income groups"
    ],
    "y": [
      -2.8,
      -7.9,
      8.6,
      -0.7,
      -1.2
    ],
    "name": "EGDI",
    "marker": {
      "color": "rgba(55, 128, 191, 0.6)"
    }
  },
  {
    "type": "bar",
    "x": [
      "Low income",
      "Lower-middle income",
      "Upper-middle income",
      "High income",
      "All income groups"
    ],
    "y": [
      6.4,
      7.3,
      12.3,
      3.9,
      5.3
    ],
    "name": "OSI",
    "marker": {
      "color": "rgba(255, 127, 14, 0.6)"
    }
  },
  {
    "type": "bar",
    "x": [
      "Low income",
      "Lower-middle income",
      "Upper-middle income",
      "High income",
      "All income groups"
    ],
    "y": [
      1.2,
      1.6,
      3.9,
      1.0,
      1.8
    ],
    "name": "TII",
    "marker": {
      "color": "rgba(44, 160, 101, 0.6)"
 

Document saved as Research_Project_Documentation.docx


In [53]:
trail_run('new graphs/bar/Others3.png', prompt2)

Extracted Json:/n  <data>
[
  {
    "type": "bar",
    "x": [
      "Apply for business license",
      "Apply for building permit",
      "Apply for government vacancies online",
      "File company for business (e-certificate)",
      "Apply for personal tax card",
      "Apply for utilities (electricity, water)*",
      "Pay for driver's license",
      "Apply for value added Tax",
      "Submit land title (transfer, mortgage)",
      "Apply for environmental permit",
      "Apply for social protection programs",
      "Apply for visa",
      "Register a police vehicle",
      "Register death of address",
      "Submit change of address"
    ],
    "y": [
      167,
      151,
      142,
      135,
      143,
      126,
      145,
      132,
      130,
      133,
      115,
      131,
      97,
      82,
      76
    ],
    "name": "Number of countries",
    "marker": {
      "color": "rgba(55, 128, 191, 0.6)"
    }
  },
  {
    "type": "scatter",
    "x": [
      "Apply for busines

JSONDecodeError: Expecting ',' delimiter: line 63 column 8 (char 1562)

In [54]:
trail_run('new graphs/bar/Others4.png', prompt2)

Extracted Json:/n   <data>
[
  {
    "type": "bar",
    "x": ["Africa", "Americas", "Asia", "Europe", "Oceania"],
    "y": [19, 36, 76, 26, 14],
    "name": "e-Procurement Platform",
    "marker": {
      "color": "rgba(55, 128, 191, 0.6)"
    }
  },
  {
    "type": "bar",
    "x": ["Africa", "Americas", "Asia", "Europe", "Oceania"],
    "y": [10, 26, 35, 3, 88],
    "name": "Digital Invoicing",
    "marker": {
      "color": "rgba(255, 127, 14, 0.6)"
    }
  }
]
</data>
<layout>
{
  "title": {
    "text": "Number of countries with e-procurement platforms and digital invoicing capabilities, by region, 2022",
    "font": {
      "family": "Arial, sans-serif",
      "size": 18,
      "color": "#333333"
    }
  },
  "xaxis": {
    "title": {
      "text": "Region",
      "font": {
        "family": "Arial, sans-serif",
        "size": 14,
        "color": "#333333"
      }
    },
    "tickfont": {
      "family": "Arial, sans-serif",
      "size": 12,
      "color": "#333333"
    },
    "

Document saved as Research_Project_Documentation.docx


In [55]:
trail_run('new graphs/bar/Others5.png', prompt2)

Extracted Json:/n  <data>
[
  {
    "type": "bar",
    "x": [
      "Register a business",
      "Apply for business license",
      "Apply for government vacancies online",
      "Apply for birth certificate",
      "File company/business tax online",
      "Apply for death certificate",
      "Apply for personal identity card",
      "Pay for utilities (electricity/gas*)",
      "Apply for marriage certificate",
      "Apply for driver's license",
      "Submit Value Added Tax",
      "Apply for land title registration",
      "Pay for utilities (water)",
      "Apply for environmental permit",
      "Apply for social protection programmes",
      "Pay fines",
      "Declare to police",
      "Register a motor vehicle",
      "Submit change of address"
    ],
    "y": [
      45,
      40,
      36,
      33,
      34,
      33,
      33,
      32,
      31,
      31,
      27,
      27,
      22,
      22,
      18,
      19,
      12,
      10,
      4
    ],
    "name": "Number of

Document saved as Research_Project_Documentation.docx


In [57]:
trail_run('new graphs/1.1_image1.jpg', prompt2)

Extracted Json:/n  <data>
[
  {
    "type": "scatter",
    "x": [2008, 2010, 2012, 2014, 2016, 2018],
    "y": [10, 20, 30, 50, 80, 140],
    "mode": "lines+markers",
    "name": "Mobile Channel",
    "marker": {
      "color": "rgba(75, 192, 192, 0.6)",
      "size": 8
    },
    "line": {
      "color": "rgba(75, 192, 192, 1)",
      "width": 2
    }
  },
  {
    "type": "scatter",
    "x": [2008, 2010, 2012, 2014, 2016, 2018],
    "y": [20, 30, 30, 40, 50, 90],
    "mode": "lines+markers",
    "name": "SMS Text Channel",
    "marker": {
      "color": "rgba(255, 127, 14, 0.6)",
      "size": 8
    },
    "line": {
      "color": "rgba(255, 127, 14, 1)",
      "width": 2
    }
  }
]
</data>
<layout>
{
  "title": {
    "text": "Countries offering SMS Text and Mobile Web/App Services",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "color": "#000000"
    }
  },
  "xaxis": {
    "title": {
      "text": "Year",
      "font": {
        "family": "Arial, sans-s

Document saved as Research_Project_Documentation.docx


In [58]:
trail_run('new graphs/1.2_image2.jpg', prompt2)

Extracted Json:/n  <data>
[
  {
    "type": "line",
    "x": [
      "2003",
      "2004",
      "2005",
      "2008",
      "2010",
      "2012"
    ],
    "y": [
      0.56,
      0.58,
      0.58,
      0.63,
      0.62,
      0.73
    ],
    "name": "Europe",
    "line": {
      "color": "rgb(0, 153, 117)",
      "width": 4
    }
  },
  {
    "type": "line",
    "x": [
      "2003",
      "2004",
      "2005",
      "2008",
      "2010",
      "2012"
    ],
    "y": [
      0.42,
      0.43,
      0.44,
      0.48,
      0.49,
      0.53
    ],
    "name": "Americas",
    "line": {
      "color": "rgb(255, 127, 14)",
      "width": 4
    }
  },
  {
    "type": "line",
    "x": [
      "2003",
      "2004",
      "2005",
      "2008",
      "2010",
      "2012"
    ],
    "y": [
      0.39,
      0.41,
      0.42,
      0.45,
      0.46,
      0.49
    ],
    "name": "Asia",
    "line": {
      "color": "rgb(44, 160, 44)",
      "width": 4
    }
  },
  {
    "type": "line",
    "x":

Document saved as Research_Project_Documentation.docx


In [59]:
trail_run('new graphs/1.2_image3.jpg', prompt2)

Extracted Json:/n  <data>
[
  {
    "type": "scatter",
    "x": [
      "Europe",
      "Americas",
      "Asia",
      "World",
      "Oceania",
      "Africa"
    ],
    "y": [
      85,
      55,
      80,
      65,
      50,
      40
    ],
    "mode": "lines+markers",
    "name": "Line Plot",
    "marker": {
      "color": "rgba(75, 192, 192, 0.6)",
      "size": 8
    },
    "line": {
      "color": "rgba(75, 192, 192, 1)",
      "width": 2
    }
  },
  {
    "type": "scatter",
    "x": [
      "Europe",
      "Americas",
      "Asia",
      "World",
      "Oceania",
      "Africa"
    ],
    "y": [
      90,
      60,
      75,
      85,
      20,
      10
    ],
    "mode": "lines+markers",
    "name": "Line Plot",
    "marker": {
      "color": "rgba(255, 128, 0, 0.6)",
      "size": 8
    },
    "line": {
      "color": "rgba(255, 128, 0, 1)",
      "width": 2
    }
  },
  {
    "type": "scatter",
    "x": [
      "Europe",
      "Americas",
      "Asia",
      "World",
     

Document saved as Research_Project_Documentation.docx


In [60]:
trail_run('new graphs/1.2_image4.jpg', prompt2)

Extracted Json:/n  <data>
[
  {
    "type": "scatter",
    "x": [
      "2013-03-31", 
      "2014-03-31", 
      "2015-03-31", 
      "2016-03-31", 
      "2017-03-31", 
      "2018-03-31"
    ],
    "y": [
      7.67, 
      7.95, 
      8.09, 
      8.03, 
      8.09, 
      8.33
    ],
    "mode": "lines+markers",
    "name": "Life satisfaction",
    "marker": {
      "color": "rgba(75, 192, 192, 0.6)",
      "size": 8
    },
    "line": {
      "color": "rgba(75, 192, 192, 1)",
      "width": 2
    },
    "text": [
      "Free long distance calls",
      "",
      "",
      "",
      "Gasolinazo (fuel prices)",
      "AMLO wins election"
    ],
    "hoverinfo": "text"
  }
]
</data>
<layout>
{
  "title": {
    "text": "Life Satisfaction in Mexico",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "color": "#000000"
    }
  },
  "xaxis": {
    "title": {
      "text": "Year",
      "font": {
        "family": "Arial, sans-serif",
        "size": 18,
       

Document saved as Research_Project_Documentation.docx


In [61]:
trail_run('new graphs/1.3_image2.jpg', prompt2)

Extracted Json:/n  <data>
[
  {
    "type": "scatter",
    "x": [0.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95],
    "y": [0.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95],
    "mode": "lines",
    "name": "2014 OSI",
    "line": {
      "color": "rgba(255, 127, 14, 1)",
      "width": 2
    }
  },
  {
    "type": "scatter",
    "x": [0.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95],
    "y": [0.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95],
    "mode": "lines",
    "name": "2014 TII",
    "line": {
      "color": "rgba(0, 176, 240, 1)",
      "width": 2
    }
  },
  {
    "type": "scatter",
    "x": [0.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95],
    "y": [0.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95],
    "mode": "lines",
    "name": "2014 HCI",
    "line": {
      "color": "rgba(230, 97, 1, 1)",
      "width": 2
    }
  },
  {
    "type": "scatter",
    "x": [0.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.

Document saved as Research_Project_Documentation.docx


In [62]:
trail_run('new graphs/1.3_image2.jpg', prompt2)

Extracted Json:/n  <data>
[
  {
    "type": "scatter",
    "x": [0.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95],
    "y": [0.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95],
    "mode": "lines",
    "name": "2014 OSI",
    "line": {
      "color": "rgba(255, 127, 14, 1)",
      "width": 2
    }
  },
  {
    "type": "scatter",
    "x": [0.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95],
    "y": [0.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95],
    "mode": "lines",
    "name": "2014 TII",
    "line": {
      "color": "rgba(0, 176, 240, 1)",
      "width": 2
    }
  },
  {
    "type": "scatter",
    "x": [0.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95],
    "y": [0.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95],
    "mode": "lines",
    "name": "2014 HCI",
    "line": {
      "color": "rgba(230, 97, 1, 1)",
      "width": 2
    }
  },
  {
    "type": "scatter",
    "x": [0.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.

Document saved as Research_Project_Documentation.docx


In [64]:
trail_run('new graphs/1.3_image1.jpg', prompt2)

Extracted Json:/n   <data>
[
  {
    "type": "scatter",
    "x": [2008, 2010, 2012, 2014, 2016, 2018, 2020, 2022],
    "y": [0.614, 0.606, 0.714, 0.699, 0.709, 0.736, 0.786, 0.811],
    "mode": "lines",
    "name": "Europe"
  },
  {
    "type": "scatter",
    "x": [2008, 2010, 2012, 2014, 2016, 2018, 2020, 2022],
    "y": [0.496, 0.491, 0.512, 0.488, 0.492, 0.549, 0.598, 0.610],
    "mode": "lines",
    "name": "Asia"
  },
  {
    "type": "scatter",
    "x": [2008, 2010, 2012, 2014, 2016, 2018, 2020, 2022],
    "y": [0.426, 0.418, 0.441, 0.472, 0.478, 0.504, 0.541, 0.567],
    "mode": "lines",
    "name": "Americas"
  },
  {
    "type": "scatter",
    "x": [2008, 2010, 2012, 2014, 2016, 2018, 2020, 2022],
    "y": [0.296, 0.282, 0.306, 0.292, 0.312, 0.342, 0.385, 0.412],
    "mode": "lines",
    "name": "Oceania"
  },
  {
    "type": "scatter",
    "x": [2008, 2010, 2012, 2014, 2016, 2018, 2020, 2022],
    "y": [0.356, 0.349, 0.373, 0.361, 0.376, 0.403, 0.442, 0.465],
    "mode": "line

Document saved as Research_Project_Documentation.docx


In [65]:
trail_run('new graphs/1.3_image3.jpg', prompt2)

Extracted Json:/n  <data>
[
  {
    "type": "scatter",
    "x": [2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008],
    "y": [12, 17, 22, 25, 27, 33, 42, 52, 58],
    "mode": "lines",
    "name": "World",
    "line": {
      "color": "rgba(0, 0, 0, 1)",
      "width": 2
    }
  },
  {
    "type": "scatter",
    "x": [2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008],
    "y": [4, 6, 10, 15, 20, 25, 35, 45, 55],
    "mode": "lines",
    "name": "Sub-Saharan Africa",
    "line": {
      "color": "rgba(153, 153, 153, 1)",
      "width": 2
    }
  },
  {
    "type": "scatter",
    "x": [2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008],
    "y": [1, 2, 3, 5, 8, 12, 18, 28, 38],
    "mode": "lines",
    "name": "Least developed countries: UN classification",
    "line": {
      "color": "rgba(204, 204, 204, 1)",
      "width": 2
    }
  },
  {
    "type": "scatter",
    "x": [2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008],
    "y": [6, 9, 13, 18, 22, 28, 36, 46, 55],
    

Document saved as Research_Project_Documentation.docx


In [66]:
trail_run('new graphs/1.3_image4.jpg', prompt2)

Extracted Json:/n  <data>
[
  {
    "type": "scatter",
    "x": [2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020],
    "y": [26.6, 31.3, 40.1, 40.3, 40.8, 40.1, 48.1, 53.9, 58.5, 59.6, 43.2, 49.2, 53.5, 50.5, 63.6],
    "mode": "lines",
    "name": "Commitments to all economies",
    "line": {
      "color": "rgba(255, 127, 14, 1)",
      "width": 3
    }
  },
  {
    "type": "scatter",
    "x": [2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020],
    "y": [5.6, 9.2, 13.2, 14.9, 13.9, 13.6, 20.3, 23.1, 23.6, 25.2, 24.6, 24.8, 26.1, 23.3, 30.1],
    "mode": "lines",
    "name": "Disbursements to all economies",
    "line": {
      "color": "rgba(255, 0, 0, 1)",
      "width": 3
    }
  },
  {
    "type": "scatter",
    "x": [2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020],
    "y": [19.9, 25.1, 33.9, 33.9, 34.3, 33.6, 39.9, 44.5, 48.3, 49.4, 36.2, 41.6, 45.1, 42.3, 5

Document saved as Research_Project_Documentation.docx


In [68]:
trail_run('new graphs/1.3_image5.jpg', prompt2)

Extracted Json:/n  <data>
[
  {
    "type": "scatter",
    "x": [
      "2005",
      "2006",
      "2007",
      "2008",
      "2009",
      "2010",
      "2011",
      "2012",
      "2013",
      "2014",
      "2015",
      "2016"
    ],
    "y": [
      10,
      15,
      25,
      35,
      45,
      55,
      65,
      75,
      85,
      90,
      95,
      100
    ],
    "mode": "lines",
    "name": "Mobile-cellular telephone subscriptions (per 100 inhabitants)",
    "line": {
      "color": "rgba(55, 128, 191, 1)",
      "width": 3
    }
  },
  {
    "type": "scatter",
    "x": [
      "2005",
      "2006",
      "2007",
      "2008",
      "2009",
      "2010",
      "2011",
      "2012",
      "2013",
      "2014",
      "2015",
      "2016"
    ],
    "y": [
      5,
      10,
      15,
      20,
      30,
      40,
      50,
      60,
      70,
      80,
      90,
      100
    ],
    "mode": "lines",
    "name": "Individuals using the Internet (per 100 inhabitants)",
    

Document saved as Research_Project_Documentation.docx


In [69]:
trail_run('new graphs/1.4_image1.jpg', prompt2)

Extracted Json:/n  <data>
[
  {
    "type": "scatter",
    "x": [
      "Low EPI",
      "Middle EPI",
      "High EPI",
      "Very High EPI"
    ],
    "y": [
      8.9,
      29,
      60.9,
      89.1
    ],
    "mode": "lines+markers",
    "name": "EPI e-Consultation",
    "marker": {
      "color": "rgba(255, 127, 14, 1)",
      "size": 8
    },
    "line": {
      "color": "rgba(255, 127, 14, 1)",
      "width": 2
    },
    "text": [
      "0.3%",
      "3.0%",
      "10.7%",
      "54.4%"
    ],
    "hoverinfo": "text"
  },
  {
    "type": "scatter",
    "x": [
      "Low EPI",
      "Middle EPI",
      "High EPI",
      "Very High EPI"
    ],
    "y": [
      19.2,
      49.8,
      75.8,
      92.1
    ],
    "mode": "lines+markers",
    "name": "EPI e-Information",
    "marker": {
      "color": "rgba(255, 192, 0, 1)",
      "size": 8
    },
    "line": {
      "color": "rgba(255, 192, 0, 1)",
      "width": 2
    },
    "text": [
      "0.3%",
      "3.0%",
      "10.7%",


Document saved as Research_Project_Documentation.docx


In [70]:
trail_run('new graphs/1.5_image1.jpg', prompt2)

Extracted Json:/n  <data>
[
  {
    "type": "scatter",
    "x": [
      "2015 Q1",
      "2015 Q2",
      "2015 Q3",
      "2015 Q4",
      "2016 Q1",
      "2016 Q2",
      "2016 Q3",
      "2016 Q4",
      "2017 Q1",
      "2017 Q2",
      "2017 Q3",
      "2017 Q4",
      "2018 Q1",
      "2018 Q2",
      "2018 Q3",
      "2018 Q4",
      "2019 Q1",
      "2019 Q2",
      "2019 Q3",
      "2019 Q4",
      "2020 Q1",
      "2020 Q2",
      "2020 Q3",
      "2020 Q4",
      "2021 Q1",
      "2021 Q2",
      "2021 Q3",
      "2021 Q4",
      "2022 Q1"
    ],
    "y": [
      95.11,
      95.51,
      95.62,
      95.83,
      96.13,
      96.52,
      96.93,
      97.35,
      98.05,
      98.86,
      99.68,
      100.5,
      101.1,
      101.8,
      102.5,
      102.9,
      103.4,
      103.8,
      104.2,
      104.5,
      94.72,
      86.23,
      90.14,
      93.45,
      96.87,
      100.3,
      103.8,
      107.4,
      111.2
    ],
    "mode": "lines",
    "name": "Volume 

Document saved as Research_Project_Documentation.docx


In [71]:
trail_run('new graphs/1.6_image1.jpg', prompt2)

Extracted Json:/n  <data>
[
  {
    "type": "scatter",
    "x": [
      "Chinese",
      "German",
      "French",
      "Russian",
      "Italian",
      "Spanish",
      "English GB",
      "English US"
    ],
    "y": [
      45,
      40,
      35,
      30,
      25,
      20,
      15,
      10
    ],
    "mode": "lines+markers",
    "name": "Happiness Index",
    "marker": {
      "color": "rgba(75, 192, 192, 0.6)",
      "size": 8
    },
    "line": {
      "color": "rgba(75, 192, 192, 1)",
      "width": 2
    }
  }
]
</data>
<layout>
{
  "title": {
    "text": "Frequency of Occurrence of 'Happiness' Across Languages",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "color": "#000000"
    }
  },
  "xaxis": {
    "title": {
      "text": "Language",
      "font": {
        "family": "Arial, sans-serif",
        "size": 18,
        "color": "#000000"
      }
    },
    "showgrid": true,
    "gridcolor": "rgba(0, 0, 0, 0.1)",
    "zeroline": true,
    "

Document saved as Research_Project_Documentation.docx


In [6]:
prompt3 =''' 
Task: Extract detailed information from a given graph image and generate JSON files (data.json, layout.json, and config.json) compatible with Plotly. Use the specified delimiters to wrap each JSON section.

Instructions:

Graph Analysis:

1. Identify and categorize the plot types present in the graph (e.g., line, scatter, bar, pie).
2. Extract the data points for each plot (x-values, y-values, and any other relevant data such as labels for pie charts).
3. Note any multiple plots on the same figure and their configurations, ensuring all are included.
4. Verify that no data points or plots are missing from the extraction.

Generate data.json:

For each plot, include details such as:
- "type": The type of plot (e.g., scatter, bar, pie).
- "x" and "y": Arrays of data points for the x and y axes.
- "labels" and "values": For pie charts.
- "mode": For line and scatter plots (e.g., markers, lines, or markers+lines).
- "name": Legend entry for the plot.
- "marker": Properties such as color, size, and symbol for markers.
- "line": Properties such as color, width, and dash style for lines.
- "text": Hover text for each data point.
- "hoverinfo": Information displayed on hover (e.g., x+y+text).

Generate layout.json:

Include details such as:
- "title": The title of the graph, including text, font properties (family, size, color).
- "xaxis" and "yaxis": Configuration including title, range, showgrid, gridcolor, zeroline, zerolinecolor, showticklabels, tickangle, tickfont.
- "legend": Properties including orientation, x, y, xanchor, font.
- "margin": Values for l (left), r (right), b (bottom), t (top), pad.
- "plot_bgcolor" and "paper_bgcolor": Background colors of the plot area and paper.

Generate config.json:

Include configuration settings such as:
- "responsive": Whether the graph is responsive (boolean).
- "displayModeBar": Whether the mode bar is displayed (boolean).
- "modeBarButtonsToRemove": List of mode bar buttons to remove (e.g., ["toImage"]).
- "scrollZoom": Whether scrolling zoom is enabled (boolean).

Output Format:

Wrap each JSON output in specific XML-like tags:
- <data> ... </data>
- <layout> ... </layout>
- <config> ... </config>

Make sure to follow the JSON formatting guidelines:
- Double Quotes for Keys and Strings: Ensure that all keys and string values in JSON are enclosed in double quotes (").
  Example: Instead of "type": bar, it should be "type": "bar".
- No Trailing Commas: Remove any trailing commas after the last element in arrays ([]) or objects ({}).
  Example: Instead of "color": "rgba(55, 128, 191, 0.6)",, it should be "color": "rgba(55, 128, 191, 0.6)"

Example Output:

<data>
[
  {
    "type": "scatter",
    "x": [1, 2, 3],
    "y": [4, 5, 6],
    "mode": "markers+lines",
    "name": "Line Plot",
    "marker": {
      "color": "rgba(75, 192, 192, 0.6)",
      "size": 8
    },
    "line": {
      "color": "rgba(75, 192, 192, 1)",
      "width": 2
    },
    "text": ["Point 1", "Point 2", "Point 3"],
    "hoverinfo": "x+y+text"
  }
  // Additional plots if present
]
</data>

<layout>
{
  "title": {
    "text": "Graph Title",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "color": "#000000"
    }
  },
  "xaxis": {
    "title": {
      "text": "X Axis",
      "font": {
        "family": "Arial, sans-serif",
        "size": 18,
        "color": "#000000"
      }
    },
    "showgrid": true,
    "gridcolor": "rgba(0, 0, 0, 0.1)",
    "zeroline": true,
    "zerolinecolor": "rgba(0, 0, 0, 0.1)"
  },
  "yaxis": {
    "title": {
      "text": "Y Axis",
      "font": {
        "family": "Arial, sans-serif",
        "size": 18,
        "color": "#000000"
      }
    },
    "showgrid": true,
    "gridcolor": "rgba(0, 0, 0, 0.1)",
    "zeroline": true,
    "zerolinecolor": "rgba(0, 0, 0, 0.1)"
  },
  "legend": {
    "orientation": "h",
    "x": 0.5,
    "xanchor": "center",
    "y": -0.2,
    "font": {
      "family": "Arial, sans-serif",
      "size": 12,
      "color": "#000000"
    }
  },
  "margin": {
    "l": 60,
    "r": 30,
    "b": 60,
    "t": 60,
    "pad": 10
  },
  "plot_bgcolor": "#ffffff",
  "paper_bgcolor": "#ffffff"
}
</layout>

<config>
{
  "responsive": true,
  "displayModeBar": true,
  "modeBarButtonsToRemove": ["toImage"],
  "scrollZoom": true
}
</config>
'''

In [7]:
# Required Secret Key

# Get your key: https://ai.google.dev/
gemini_key = "AIzaSyBpA8gpTu-sJ_uac1Hj-Y5z8dRKyObvEdM"

# Get your key: https://platform.openai.com/signup
open_ai_key = "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"

In [80]:
trail_run('new graphs/1.7_image1.jpg', prompt2)

DeadlineExceeded: 504 Deadline Exceeded

In [79]:
trail_run('new graphs/linechart_other1.jpg', prompt3)

Extracted Json:/n  <data>
[
  {
    "type": "scatter",
    "x": [1972, 1974, 1976, 1978, 1980, 1982, 1984, 1986, 1988, 1990, 1992, 1994, 1996, 1998, 2000, 2002, 2004, 2006, 2008, 2010, 2012, 2014, 2016],
    "y": [2.01, 2.03, 2.25, 2.23, 2.19, 2.19, 2.25, 2.23, 2.21, 2.19, 2.19, 2.19, 2.19, 2.21, 2.23, 2.25, 2.29, 2.31, 2.33, 2.31, 2.29, 2.25, 2.21],
    "mode": "lines",
    "name": "Happiness",
    "line": {
      "color": "rgba(219, 64, 82, 1)",
      "width": 3
    },
    "hoverinfo": "x+y"
  },
  {
    "type": "scatter",
    "x": [1972, 1974, 1976, 1978, 1980, 1982, 1984, 1986, 1988, 1990, 1992, 1994, 1996, 1998, 2000, 2002, 2004, 2006, 2008, 2010, 2012, 2014, 2016],
    "y": [20000, 22000, 25000, 28000, 30000, 32000, 35000, 38000, 40000, 42000, 44000, 46000, 48000, 50000, 52000, 53000, 54000, 55000, 56000, 57000, 58000, 59000, 60000],
    "mode": "lines",
    "name": "GDP Per Capita",
    "line": {
      "color": "rgba(55, 128, 191, 1)",
      "width": 3
    },
    "hoverinfo": "x

Document saved as Research_Project_Documentation.docx


In [12]:
for i in range(1,7):
    trail_run('pie'+str(i)+'.jpg', prompt3)

Extracted Json:/n  <data>
[
  {
    "type": "pie",
    "labels": ["Male", "Female"],
    "values": [88.99, 11.01],
    "hoverinfo": "label+value+percent",
    "textinfo": "label+value+percent",
    "textposition": "outside",
    "marker": {
      "colors": ["rgba(55, 128, 191, 0.6)", "rgba(255, 127, 14, 0.6)"]
    }
  }
]
</data>

<layout>
{
  "title": {
    "text": "Gender Disparities at the Top Level of E-Government",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "color": "#000000"
    }
  },
  "legend": {
    "orientation": "h",
    "x": 0.5,
    "xanchor": "center",
    "y": -0.2,
    "font": {
      "family": "Arial, sans-serif",
      "size": 12,
      "color": "#000000"
    }
  },
  "margin": {
    "l": 60,
    "r": 30,
    "b": 60,
    "t": 60,
    "pad": 10
  },
  "plot_bgcolor": "#ffffff",
  "paper_bgcolor": "#ffffff"
}
</layout>

<config>
{
  "responsive": true,
  "displayModeBar": true,
  "modeBarButtonsToRemove": ["toImage"],
  "scrollZoom": tr

Document saved as Research_Project_Documentation.docx
Extracted Json:/n  <data>
[
  {
    "type": "pie",
    "labels": ["Availability to provide feedback to improve accessibility/usability of e-services", "Evidence of user satisfaction evaluation", "Availability to provide usage statistics"],
    "values": [66, 47, 36],
    "textinfo": "label+percent",
    "hoverinfo": "label+value+percent",
    "marker": {
      "colors": ["rgba(158, 202, 109, 0.6)", "rgba(111, 176, 223, 0.6)", "rgba(75, 192, 192, 0.6)"]
    },
    "name": "2020"
  },
  {
    "type": "pie",
    "labels": ["Availability to provide feedback to improve accessibility/usability of e-services", "Evidence of user satisfaction evaluation", "Availability to provide usage statistics"],
    "values": [64, 42, 30],
    "textinfo": "label+percent",
    "hoverinfo": "label+value+percent",
    "marker": {
      "colors": ["rgba(158, 202, 109, 0.6)", "rgba(111, 176, 223, 0.6)", "rgba(75, 192, 192, 0.6)"]
    },
    "name": "2022"
  }

Document saved as Research_Project_Documentation.docx
Extracted Json:/n  <data>
[
  {
    "type": "pie",
    "labels": ["Africa", "Asia", "Americas", "Europe", "Oceania"],
    "values": [40, 24, 7, 37, 22],
    "hoverinfo": "label+value+percent",
    "textinfo": "label+value+percent",
    "textposition": "outside",
    "marker": {
      "colors": ["rgba(255, 159, 64, 0.6)", "rgba(255, 118, 118, 0.6)", "rgba(255, 204, 128, 0.6)", "rgba(255, 127, 14, 0.6)", "rgba(128, 0, 128, 0.6)"]
    }
  }
]
</data>

<layout>
{
  "title": {
    "text": "Number of countries with at least one service for vulnerable groups, by region",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "color": "#000000"
    }
  },
  "legend": {
    "orientation": "h",
    "x": 0.5,
    "xanchor": "center",
    "y": -0.2,
    "font": {
      "family": "Arial, sans-serif",
      "size": 12,
      "color": "#000000"
    }
  },
  "margin": {
    "l": 60,
    "r": 30,
    "b": 60,
    "t": 60,
    "pa

Document saved as Research_Project_Documentation.docx
Extracted Json:/n   <data>
[
  {
    "type": "pie",
    "labels": ["Internet Users", "Mobile Subscribers", "Active Mobile Broadband Subscriptions", "Fixed Broadband Subscriptions"],
    "values": [25, 25, 25, 25],
    "hoverinfo": "label+value+percent"
  }
]
</data>

<layout>
{
  "title": {
    "text": "Telecommunication Infrastructure Index (TII) and its Components",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "color": "#000000"
    }
  },
  "legend": {
    "orientation": "h",
    "x": 0.5,
    "xanchor": "center",
    "y": -0.2,
    "font": {
      "family": "Arial, sans-serif",
      "size": 12,
      "color": "#000000"
    }
  },
  "margin": {
    "l": 60,
    "r": 30,
    "b": 60,
    "t": 60,
    "pad": 10
  },
  "plot_bgcolor": "#ffffff",
  "paper_bgcolor": "#ffffff"
}
</layout>

<config>
{
  "responsive": true,
  "displayModeBar": true,
  "modeBarButtonsToRemove": ["toImage"],
  "scrollZoom": t

Document saved as Research_Project_Documentation.docx
Extracted Json:/n  <data>
[
  {
    "type": "pie",
    "labels": ["Low", "Middle", "High", "Very High"],
    "values": [4, 27, 38, 31],
    "hoverinfo": "label+value+percent",
    "textinfo": "label+value+percent",
    "textposition": "inside",
    "name": "2020"
  },
  {
    "type": "pie",
    "labels": ["Low", "Middle", "High", "Very High"],
    "values": [4, 27, 73, 60],
    "hoverinfo": "label+value+percent",
    "textinfo": "label+value+percent",
    "textposition": "inside",
    "name": "2022"
  }
]
</data>

<layout>
{
  "title": {
    "text": "Number and Proportion of Countries within Each EGDI Grouping, 2020 and 2022",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "color": "#000000"
    }
  },
  "legend": {
    "orientation": "h",
    "x": 0.5,
    "xanchor": "center",
    "y": -0.2,
    "font": {
      "family": "Arial, sans-serif",
      "size": 12,
      "color": "#000000"
    }
  },
  "margin

Document saved as Research_Project_Documentation.docx
Extracted Json:/n  <data>
[
  {
    "type": "pie",
    "labels": ["Low", "Middle", "High", "Very high"],
    "values": [18, 31, 25, 26],
    "hoverinfo": "label+value+percent",
    "textinfo": "label+value+percent",
    "marker": {
      "colors": ["rgba(255, 255, 0, 0.6)", "rgba(128, 128, 128, 0.6)", "rgba(0, 128, 255, 0.6)", "rgba(191, 64, 191, 0.6)"]
    }
  }
]
</data>

<layout>
{
  "title": {
    "text": "LOSI 2022 levels for the 146 cities assessed",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "color": "#000000"
    }
  },
  "legend": {
    "orientation": "h",
    "x": 0.5,
    "xanchor": "center",
    "y": -0.2,
    "font": {
      "family": "Arial, sans-serif",
      "size": 12,
      "color": "#000000"
    }
  },
  "margin": {
    "l": 60,
    "r": 30,
    "b": 60,
    "t": 60,
    "pad": 10
  },
  "plot_bgcolor": "#ffffff",
  "paper_bgcolor": "#ffffff"
}
</layout>

<config>
{
  "responsive": 

Document saved as Research_Project_Documentation.docx


In [None]:
trail_run('new graphs/linechart_other1.jpg', prompt3)

Trial for area Chart

In [37]:
trail_run('graphs/Area Plots/areaplot-2.png', prompt2v3)

Extracted Json:/n  ```xml
<data>
[
  {
    "type": "scatter",
    "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
    "y": [400, 430, 550, 530, 580, 600, 650, 680, 660, 630, 600, 580],
    "name": "Housing",
    "marker": {
      "color": "rgba(255, 159, 64, 0.8)"
    },
    "hoverinfo": "x+y+text"
  },
  {
    "type": "scatter",
    "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
    "y": [200, 220, 280, 250, 300, 320, 350, 380, 360, 330, 300, 280],
    "name": "Food",
    "marker": {
      "color": "rgba(255, 99, 132, 0.6)"
    },
    "hoverinfo": "x+y+text"
  },
  {
    "type": "scatter",
    "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
    "y": [100, 120, 150, 130, 180, 200, 250, 280, 260, 230, 200, 180],
    "name": "Utility",
    "marker": {
      "color": "rgba(54, 162, 235, 1)"
    },
    "hoverinfo": "x+y+text"
  },
  {
    "type": "scatter",
    "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
    "y": [150, 180, 230, 200, 250, 270, 300, 330, 310, 280, 250, 230],
    "name": "Insur

Document saved as Research_Project_Documentation.docx


In [39]:
trail_run('graphs/Area Plots/areaplot-2.png', prompt2)

Extracted Json:/n   <data>
[
  {
    "type": "scatter",
    "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
    "y": [400, 430, 550, 560, 600, 610, 650, 660, 630, 600, 580, 500],
    "mode": "lines",
    "name": "Housing"
  },
  {
    "type": "scatter",
    "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
    "y": [200, 220, 280, 290, 330, 300, 350, 360, 330, 300, 280, 200],
    "mode": "lines",
    "name": "Food"
  },
  {
    "type": "scatter",
    "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
    "y": [100, 120, 190, 200, 220, 210, 250, 260, 230, 200, 180, 100],
    "mode": "lines",
    "name": "Utility"
  },
  {
    "type": "scatter",
    "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
    "y": [150, 180, 130, 150, 120, 140, 100, 120, 140, 120, 150, 180],
    "mode": "lines",
    "name": "Insurance"
  }
]
</data>
<layout>
{
  "title": {
    "text": "Monthly Expenses",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "color": "#000000"
    }
  },
  "xaxis": {
    "

Document saved as Research_Project_Documentation.docx


In [40]:
trail_run('graphs\Area Plots\Matplotlib-with-Pandas-Area-Plot.png', prompt2)

Extracted Json:/n   <data>
[
  {
    "type": "scatter",
    "x": [1, 2, 3, 4, 5, 6, 7, 8],
    "y": [0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 1.4, 1.6],
    "mode": "lines",
    "name": "A"
  },
  {
    "type": "scatter",
    "x": [1, 2, 3, 4, 5, 6, 7, 8],
    "y": [1.8, 1.6, 1.4, 1.2, 1.0, 0.8, 0.6, 0.4],
    "mode": "lines",
    "name": "B"
  },
  {
    "type": "scatter",
    "x": [1, 2, 3, 4, 5, 6, 7, 8],
    "y": [0.8, 1.0, 1.2, 1.4, 1.6, 1.8, 2.0, 2.2],
    "mode": "lines",
    "name": "C"
  },
  {
    "type": "scatter",
    "x": [1, 2, 3, 4, 5, 6, 7, 8],
    "y": [1.4, 1.2, 1.0, 0.8, 0.6, 0.4, 0.2, 0.0],
    "mode": "lines",
    "name": "D"
  },
  {
    "type": "scatter",
    "x": [1, 2, 3, 4, 5, 6, 7, 8],
    "y": [2.4, 2.2, 2.0, 1.8, 1.6, 1.4, 1.2, 1.0],
    "mode": "lines",
    "name": "E"
  }
]
</data>
<layout>
{
  "title": {
    "text": "Area Chart"
  },
  "xaxis": {
    "title": {
      "text": "Time"
    }
  },
  "yaxis": {
    "title": {
      "text": "Value"
    }
  },
  "legend": 

Document saved as Research_Project_Documentation.docx


In [41]:
trail_run('graphs\Area Plots\Matplotlib-with-Pandas-Area-Plot.png', prompt2v2)

Extracted Json:/n  ```xml
<data>
[
  {
    "type": "scatter",
    "x": [1, 2, 3, 4, 5, 6, 7, 8],
    "y": [0.2, 0.5, 1.1, 2.0, 1.8, 2.2, 1.5, 3.3],
    "name": "A"
  },
  {
    "type": "scatter",
    "x": [1, 2, 3, 4, 5, 6, 7, 8],
    "y": [0.1, 0.3, 0.7, 1.5, 1.3, 1.7, 1.1, 2.7],
    "name": "B"
  },
  {
    "type": "scatter",
    "x": [1, 2, 3, 4, 5, 6, 7, 8],
    "y": [0.3, 0.6, 1.3, 2.2, 2.0, 2.4, 1.7, 3.5],
    "name": "C"
  },
  {
    "type": "scatter",
    "x": [1, 2, 3, 4, 5, 6, 7, 8],
    "y": [0.4, 0.8, 1.5, 2.4, 2.2, 2.6, 1.9, 3.7],
    "name": "D"
  },
  {
    "type": "scatter",
    "x": [1, 2, 3, 4, 5, 6, 7, 8],
    "y": [0.5, 1.0, 1.7, 2.6, 2.4, 2.8, 2.1, 3.9],
    "name": "E"
  }
]
</data>
<layout>
{
  "title": {
    "text": "Area Chart"
  },
  "xaxis": {
    "title": {
      "text": "Time"
    }
  },
  "yaxis": {
    "title": {
      "text": "Value"
    }
  },
  "legend": {
    "orientation": "h",
    "y": -0.5
  }
}
</layout>
<config>
{
  "responsive": true
}
</config>

Document saved as Research_Project_Documentation.docx


In [42]:
trail_run('graphs\Area Plots\Matplotlib-with-Pandas-Area-Plot.png', prompt2v3)

Extracted Json:/n  ```xml
<data>
[
  {
    "type": "scatter",
    "x": [1, 2, 3, 4, 5, 6, 7, 8],
    "y": [0.2, 0.5, 1.1, 2.0, 1.8, 2.2, 1.5, 3.3],
    "name": "A"
  },
  {
    "type": "scatter",
    "x": [1, 2, 3, 4, 5, 6, 7, 8],
    "y": [0.1, 0.3, 0.7, 1.2, 1.1, 1.5, 1.0, 2.5],
    "name": "B"
  },
  {
    "type": "scatter",
    "x": [1, 2, 3, 4, 5, 6, 7, 8],
    "y": [0.3, 0.6, 1.3, 2.4, 2.2, 2.6, 1.8, 3.7],
    "name": "C"
  },
  {
    "type": "scatter",
    "x": [1, 2, 3, 4, 5, 6, 7, 8],
    "y": [0.0, 0.2, 0.5, 1.0, 0.9, 1.2, 0.8, 2.0],
    "name": "D"
  },
  {
    "type": "scatter",
    "x": [1, 2, 3, 4, 5, 6, 7, 8],
    "y": [0.4, 0.8, 1.5, 2.6, 2.4, 2.8, 2.0, 3.9],
    "name": "E"
  }
]
</data>
<layout>
{
  "title": {
    "text": "Area Chart"
  },
  "xaxis": {
    "title": {
      "text": "Time"
    }
  },
  "yaxis": {
    "title": {
      "text": "Value"
    }
  },
  "legend": {
    "orientation": "h",
    "y": -0.5
  }
}
</layout>
<config>
{
  "responsive": true
}
</config>

Document saved as Research_Project_Documentation.docx


trail_run('graphs\Area Plots\Matplotlib-with-Pandas-Area-Plot.png', prompt2v2)

In [43]:
trail_run('graphs\Area Plots\stacked-area-color.png', prompt2v2)

Extracted Json:/n  ```xml
<data>
[
  {
    "type": "scatter",
    "x": [1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959, 1960],
    "y": [760, 830, 900, 950, 1020, 1100, 1150, 1220, 1250, 1300, 1350],
    "mode": "lines",
    "name": "Line Plot",
    "line": {
      "color": "rgba(75, 192, 192, 1)",
      "width": 2
    }
  },
  {
    "type": "bar",
    "x": [1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959, 1960],
    "y": [100, 150, 200, 250, 300, 350, 400, 450, 500, 550, 600],
    "name": "Bar Chart",
    "marker": {
      "color": "rgba(255, 99, 132, 0.6)"
    },
    "hoverinfo": "x+y+text"
  },
  {
    "type": "scatter",
    "x": [1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959, 1960],
    "y": [400, 450, 500, 550, 600, 650, 700, 750, 800, 850, 900],
    "mode": "lines",
    "name": "Area Chart",
    "fill": "tozeroy",
    "line": {
      "color": "rgba(54, 162, 235, 1)",
      "width": 2
    }
  }
]
</data>
<layout>
{
  "title": {
    "text": "Comp

Document saved as Research_Project_Documentation.docx


In [44]:
trail_run('graphs\Area Plots\stacked-area-color.png', prompt2v3)

Extracted Json:/n  ```xml
<data>
[
  {
    "type": "scatter",
    "x": [1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959, 1960],
    "y": [760, 830, 910, 1020, 1100, 1260, 1420, 1510, 1670, 1780, 1900],
    "mode": "lines",
    "name": "Line Plot",
    "line": {
      "color": "rgba(75, 192, 192, 1)",
      "width": 2
    }
  },
  {
    "type": "bar",
    "x": [1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959, 1960],
    "y": [100, 150, 220, 300, 350, 420, 510, 580, 670, 730, 820],
    "name": "Bar Chart",
    "marker": {
      "color": "rgba(255, 99, 132, 0.6)"
    },
    "hoverinfo": "x+y+text"
  },
  {
    "type": "scatter",
    "x": [1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959, 1960],
    "y": [200, 250, 330, 420, 480, 550, 640, 710, 800, 860, 950],
    "mode": "lines",
    "fill": "tozeroy",
    "name": "Area Chart",
    "line": {
      "color": "rgba(54, 162, 235, 1)",
      "width": 2
    },
    "fillcolor": "rgba(54, 162, 235, 0.5)"
  }
]
</da

Document saved as Research_Project_Documentation.docx


In [14]:
prompt2v4 = '''
Task: Extract detailed information from a given graph image and generate JSON files (`data.json`, `layout.json`, and `config.json`) compatible with Plotly. Use the specified delimiters to wrap each JSON section.

### Instructions:

#### Graph Analysis:

1. **Identify and categorize** the plot types present in the graph. Include both simple and complex plots:
   - Simple plots: `scatter`, `bar`, `pie`, `line`,`area`
   - Complex plots: `stacked bar`, `scatter3d`, `surface`.
   Confirm the presence of each plot type.
   Make sure if its area plot, it is identified as "type": "area".

2. **Extract data points** for each plot type:
   - For `scatter`, `line`, and `area` plots: Extract `x` and `y` values.
   - For `bar` and `stacked bar` plots: Extract `x` (categories) and `y` values.
   - For `pie` charts: Extract `labels` and `values`.
   - For `scatter3d` and `surface` plots: Extract `x`, `y`, and `z` values.

3. **Note any multiple plots** on the same figure and their configurations:
   - For stacked plots, identify the stacking order and group.
   - For area charts, ensure `fill` is specified correctly.

#### Generate `data.json`:

For each plot, include details such as:
- **type**: The type of plot (e.g., `"scatter"`, `"bar"`, `"pie"`, `"area"`, `"scatter3d"`, `"surface"`).
  - For area charts, use `"type": "scatter"` and specify `"fill"`.
- **x**, **y**, and **z** (if applicable): Arrays of data points for the `x`, `y`, and `z` axes.
- **labels** and **values** for pie charts.
- **mode**: For line and scatter plots (e.g., `"markers"`, `"lines"`, `"markers+lines"`).
- **name**: Legend entry for the plot.
- **marker**: Properties such as color, size, and symbol for markers.
- **line**: Properties such as color, width, and dash style for lines.
- **text**: Hover text for each data point.
- **hoverinfo**: Information displayed on hover (e.g., `"x+y+text"`).
- **fill**: For area charts, specify the fill (e.g., `"tozeroy"`, `"tonexty"`).
- **fillcolor**: The color used to fill the area under the line.
- **stackgroup**: For stacked bar charts, specify the group for stacking.

#### Generate `layout.json`:

Include details such as:
- **title**: The title of the graph, including text and font properties.
- **xaxis** and **yaxis**: Configuration including title, range, grid properties, and tick settings.
- **zaxis**: Configuration for 3D plots including title, range, and grid properties.
- **legend**: Properties including orientation and positioning.
- **margin**: Values for left, right, bottom, top, and padding.
- **plot_bgcolor** and **paper_bgcolor**: Background colors of the plot area and paper.
- **scene**: For 3D plots, configuration for the 3D scene, including camera angles, lighting, and aspect ratio.

#### Generate `config.json`:

Include configuration settings such as:
- **responsive**: Whether the graph is responsive.
- **displayModeBar**: Whether the mode bar is displayed.
- **modeBarButtonsToRemove**: List of mode bar buttons to remove.
- **scrollZoom**: Whether scrolling zoom is enabled.

### Output Format:

Wrap each JSON output in specific XML-like tags:
- `<data> ... </data>`
- `<layout> ... </layout>`
- `<config> ... </config>`

Make sure to follow the JSON formatting guidelines:
- **Double Quotes for Keys and Strings**: Ensure all keys and string values in JSON are enclosed in double quotes.
- **No Trailing Commas**: Remove any trailing commas after the last element in arrays or objects.

### Example Output:

```xml
<data>
[
  {
    "type": "scatter",
    "x": [1, 2, 3],
    "y": [4, 5, 6],
    "mode": "markers+lines",
    "name": "Line Plot",
    "marker": {
      "color": "rgba(75, 192, 192, 0.6)",
      "size": 8
    },
    "line": {
      "color": "rgba(75, 192, 192, 1)",
      "width": 2
    }
  },
  {
    "type": "area",
    "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
    "y": [400, 430, 550, 620, 580, 600, 630, 700, 670, 630, 600, 500],
    "name": "Housing",
    "marker": {
      "color": "rgba(255, 159, 64, 0.8)"
    },
    "line": {
      "color": "rgba(255, 159, 64, 1)",
      "width": 2
    }
  },

  {
    "type": "bar",
    "x": ["A", "B", "C"],
    "y": [10, 15, 13],
    "name": "Bar Chart",
    "marker": {
      "color": "rgba(255, 99, 132, 0.6)"
    },
    "hoverinfo": "x+y+text"
  },
  {
    "type": "scatter",
    "x": [1, 2, 3],
    "y": [6, 7, 8],
    "mode": "lines",
    "fill": "tozeroy",
    "name": "Area Chart",
    "line": {
      "color": "rgba(54, 162, 235, 1)",
      "width": 2
    },
    "fillcolor": "rgba(54, 162, 235, 0.5)"
  },
  {
    "type": "scatter3d",
    "x": [1, 2, 3],
    "y": [4, 5, 6],
    "z": [7, 8, 9],
    "mode": "markers",
    "name": "3D Scatter Plot",
    "marker": {
      "color": "rgba(255, 159, 64, 0.8)",
      "size": 5
    },
    "hoverinfo": "x+y+z+text"
  },
  {
    "type": "surface",
    "z": [
      [10, 10.625, 12.5, 15.625, 20],
      [5.625, 6.25, 8.125, 11.25, 15.625],
      [0, 1.25, 3.125, 6.25, 10.625]
    ],
    "name": "Surface Plot",
    "colorscale": "Viridis",
    "hoverinfo": "z+name"
  }
]
</data>
<layout>
{
  "title": {
    "text": "Complex Graph Example",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "color": "#000000"
    }
  },
  "xaxis": {
    "title": {
      "text": "X Axis",
      "font": {
        "family": "Arial, sans-serif",
        "size": 18,
        "color": "#000000"
      }
    },
    "showgrid": true,
    "gridcolor": "rgba(0, 0, 0, 0.1)",
    "zeroline": true,
    "zerolinecolor": "rgba(0, 0, 0, 0.1)"
  },
  "yaxis": {
    "title": {
      "text": "Y Axis",
      "font": {
        "family": "Arial, sans-serif",
        "size": 18,
        "color": "#000000"
      }
    },
    "showgrid": true,
    "gridcolor": "rgba(0, 0, 0, 0.1)",
    "zeroline": true,
    "zerolinecolor": "rgba(0, 0, 0, 0.1)"
  },
  "legend": {
    "orientation": "h",
    "x": 0.5,
    "xanchor": "center",
    "y": -0.2,
    "font": {
      "family": "Arial, sans-serif",
      "size": 12,
      "color": "#000000"
    }
  },
  "margin": {
    "l": 60,
    "r": 30,
    "b": 60,
    "t": 60
  },
  "plot_bgcolor": "#ffffff",
  "paper_bgcolor": "#ffffff",
  "scene": {
    "xaxis": {"title": "X Axis"},
    "yaxis": {"title": "Y Axis"},
    "zaxis": {"title": "Z Axis"}
  }
}
</layout>
<config>
{
  "responsive": true,
  "displayModeBar": true,
  "modeBarButtonsToRemove": ["toImage"],
  "scrollZoom": true
}
</config>
'''

In [77]:
trail_run('graphs/Area Plots/areaplot-2.png', prompt2)

Extracted Json:/n   <data>
[
  {
    "type": "scatter",
    "x": [
      "Jan",
      "Feb",
      "Mar",
      "Apr",
      "May",
      "Jun",
      "Jul",
      "Aug",
      "Sep",
      "Oct",
      "Nov",
      "Dec"
    ],
    "y": [
      400,
      430,
      440,
      470,
      500,
      450,
      430,
      540,
      480,
      520,
      560,
      600
    ],
    "mode": "lines",
    "name": "Housing",
    "line": {
      "color": "rgba(55, 128, 191, 1)",
      "width": 2
    }
  },
  {
    "type": "scatter",
    "x": [
      "Jan",
      "Feb",
      "Mar",
      "Apr",
      "May",
      "Jun",
      "Jul",
      "Aug",
      "Sep",
      "Oct",
      "Nov",
      "Dec"
    ],
    "y": [
      200,
      220,
      230,
      250,
      280,
      230,
      250,
      290,
      220,
      240,
      250,
      270
    ],
    "mode": "lines",
    "name": "Food",
    "line": {
      "color": "rgba(255, 127, 14, 1)",
      "width": 2
    }
  },
  {
    "type": "scatter

Document saved as Research_Project_Documentation.docx


In [58]:
info = ''' <data>
[
  {
    "type": "area",
    "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
    "y": [400, 430, 550, 620, 580, 600, 630, 700, 670, 630, 600, 500],
    "name": "Housing",
    "marker": {
      "color": "rgba(255, 159, 64, 0.8)"
    },
    "line": {
      "color": "rgba(255, 159, 64, 1)",
      "width": 2
    }
  },
  {
    "type": "area",
    "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
    "y": [200, 220, 280, 310, 250, 280, 320, 350, 330, 310, 280, 200],
    "name": "Food",
    "marker": {
      "color": "rgba(255, 99, 132, 0.8)"
    },
    "line": {
      "color": "rgba(255, 99, 132, 1)",
      "width": 2
    }
  },
  {
    "type": "area",
    "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
    "y": [100, 110, 150, 180, 130, 150, 180, 200, 180, 150, 130, 100],
    "name": "Utility",
    "marker": {
      "color": "rgba(54, 162, 235, 0.8)"
    },
    "line": {
      "color": "rgba(54, 162, 235, 1)",
      "width": 2
    }
  },
  {
    "type": "area",
    "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
    "y": [150, 180, 200, 250, 220, 200, 250, 280, 250, 200, 180, 150],
    "name": "Insurance",
    "marker": {
      "color": "rgba(75, 192, 192, 0.8)"
    },
    "line": {
      "color": "rgba(75, 192, 192, 1)",
      "width": 2
    }
  }
]
</data>
<layout>
{
  "title": {
    "text": "Monthly Expenses",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "color": "#000000"
    }
  },
  "xaxis": {
    "title": {
      "text": "Months",
      "font": {
        "family": "Arial, sans-serif",
        "size": 18,
        "color": "#000000"
      }
    },
    "showgrid": true,
    "gridcolor": "rgba(0, 0, 0, 0.1)",
    "zeroline": true,
    "zerolinecolor": "rgba(0, 0, 0, 0.1)"
  },
  "yaxis": {
    "title": {
      "text": "Amount ($)",
      "font": {
        "family": "Arial, sans-serif",
        "size": 18,
        "color": "#000000"
      }
    },
    "showgrid": true,
    "gridcolor": "rgba(0, 0, 0, 0.1)",
    "zeroline": true,
    "zerolinecolor": "rgba(0, 0, 0, 0.1)"
  },
  "legend": {
    "orientation": "h",
    "x": 0.5,
    "xanchor": "center",
    "y": -0.2,
    "font": {
      "family": "Arial, sans-serif",
      "size": 12,
      "color": "#000000"
    }
  },
  "margin": {
    "l": 60,
    "r": 30,
    "b": 60,
    "t": 60
  },
  "plot_bgcolor": "#ffffff",
  "paper_bgcolor": "#ffffff"
}
</layout>
<config>
{
  "responsive": true,
  "displayModeBar": true,
  "modeBarButtonsToRemove": ["toImage"],
  "scrollZoom": true
}
</config>
```
Updated Json:/n  ```xml
<data>
[
  {
    "type": "area",
    "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
    "y": [400, 430, 550, 620, 580, 600, 630, 700, 670, 630, 600, 500],
    "name": "Housing",
    "marker": {
      "color": "rgba(255, 159, 64, 0.8)"
    },
    "line": {
      "color": "rgba(255, 159, 64, 1)",
      "width": 2
    }
  },
  {
    "type": "area",
    "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
    "y": [200, 220, 280, 310, 250, 280, 320, 350, 330, 310, 280, 200],
    "name": "Food",
    "marker": {
      "color": "rgba(255, 99, 132, 0.8)"
    },
    "line": {
      "color": "rgba(255, 99, 132, 1)",
      "width": 2
    }
  },
  {
    "type": "area",
    "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
    "y": [100, 110, 150, 180, 130, 150, 180, 200, 180, 150, 130, 100],
    "name": "Utility",
    "marker": {
      "color": "rgba(54, 162, 235, 0.8)"
    },
    "line": {
      "color": "rgba(54, 162, 235, 1)",
      "width": 2
    }
  },
  {
    "type": "area",
    "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
    "y": [150, 180, 200, 250, 220, 200, 250, 280, 250, 200, 180, 150],
    "name": "Insurance",
    "marker": {
      "color": "rgba(75, 192, 192, 0.8)"
    },
    "line": {
      "color": "rgba(75, 192, 192, 1)",
      "width": 2
    }
  }
]
</data>
<layout>
{
  "title": {
    "text": "Monthly Expenses",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "color": "#000000"
    }
  },
  "xaxis": {
    "title": {
      "text": "Months",
      "font": {
        "family": "Arial, sans-serif",
        "size": 18,
        "color": "#000000"
      }
    },
    "showgrid": true,
    "gridcolor": "rgba(0, 0, 0, 0.1)",
    "zeroline": true,
    "zerolinecolor": "rgba(0, 0, 0, 0.1)"
  },
  "yaxis": {
    "title": {
      "text": "Amount ($)",
      "font": {
        "family": "Arial, sans-serif",
        "size": 18,
        "color": "#000000"
      }
    },
    "showgrid": true,
    "gridcolor": "rgba(0, 0, 0, 0.1)",
    "zeroline": true,
    "zerolinecolor": "rgba(0, 0, 0, 0.1)"
  },
  "legend": {
    "orientation": "h",
    "x": 0.5,
    "xanchor": "center",
    "y": -0.2,
    "font": {
      "family": "Arial, sans-serif",
      "size": 12,
      "color": "#000000"
    }
  },
  "margin": {
    "l": 60,
    "r": 30,
    "b": 60,
    "t": 60
  },
  "plot_bgcolor": "#ffffff",
  "paper_bgcolor": "#ffffff"
}
</layout>
<config>
{
  "responsive": true,
  "displayModeBar": true,
  "modeBarButtonsToRemove": ["toImage"],
  "scrollZoom": true
}
</config>
```'''

In [59]:
plot_from_ai_output_v2(info)

Trial for plotting different charts

In [74]:
trail_run('graphs/Area Plots/areaplot-2.png', prompt2)

Extracted Json:/n   <data>
[
  {
    "type": "scatter",
    "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
    "y": [400, 430, 550, 560, 600, 610, 650, 660, 630, 600, 580, 500],
    "mode": "lines",
    "name": "Housing"
  },
  {
    "type": "scatter",
    "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
    "y": [200, 220, 280, 290, 330, 300, 350, 360, 330, 300, 280, 200],
    "mode": "lines",
    "name": "Food"
  },
  {
    "type": "scatter",
    "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
    "y": [100, 120, 190, 200, 220, 210, 250, 260, 230, 200, 180, 100],
    "mode": "lines",
    "name": "Utility"
  },
  {
    "type": "scatter",
    "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
    "y": [150, 180, 130, 150, 120, 140, 100, 120, 140, 120, 150, 180],
    "mode": "lines",
    "name": "Insurance"
  }
]
</data>
<layout>
{
  "title": {
    "text": "Monthly Expenses",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "color": "#000000"
    }
  },
  "xaxis": {
    "

Document saved as Research_Project_Documentation.docx


In [48]:
areachart = ''' <data>
[
  {
    "type": "area",
    "x": [1, 2, 3, 4, 5],
    "y": [10, 15, 13, 17, 12],
    "name": "Area Chart",
    "line": {
      "color": "rgba(75, 192, 192, 1)",
      "width": 2
    },
    "text": ["Point 1", "Point 2", "Point 3", "Point 4", "Point 5"]
  }
]
</data>
<layout>
{
  "title": {
    "text": "Area Chart Example"
  },
  "xaxis": {
    "title": "X Axis"
  },
  "yaxis": {
    "title": "Y Axis"
  }
}
</layout>
<config>
{
  "responsive": true
}
</config>
'''

In [49]:
plot_from_ai_output_v2(areachart)

In [None]:
trail_run('graphs/Area Plots/areaplot-2.png', prompt2)

In [16]:
import os
import csv
import json
from docx import Document
from docx.shared import Inches
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT

def update_doc_v2(doc_path, image_path, csv_path, prompt, data, layout, config, updated_data, updated_layout, updated_config, chart_path, performance_metrics, extracted_csv):

    # Check if the document exists or create a new one
    try:
        document = Document(doc_path)
        document.add_page_break()
    except:
        document = Document()
        document.add_heading('ChartQA Dataset', level=1)
        document.add_page_break()
    
    # Add a new section with a title
    document.add_heading('Test Run', level=1)
    
    # Add the first image with caption
    document.add_picture(image_path, width=Inches(4.5))
    last_paragraph = document.paragraphs[-1]
    last_paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
    caption = document.add_paragraph(f'Figure: {os.path.basename(image_path)}')
    caption.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
    
    # Add Original CSV table if csv_path is provided
    if csv_path and os.path.exists(csv_path):
        # Add CSV data to the document
        with open(csv_path, 'r', newline='', encoding='utf-8') as csvfile:
            csv_reader = csv.reader(csvfile)
            csv_data = list(csv_reader)

            # Determine table dimensions
            num_rows = len(csv_data)
            num_cols = max(len(row) for row in csv_data)

            # Add table with determined dimensions
            document.add_paragraph('')  # Add empty paragraph for spacing
            document.add_heading(f'Original Data: {os.path.basename(csv_path)}', level=2)
            table = document.add_table(rows=num_rows, cols=num_cols)
            table.style = 'Table Grid'

            # Populate table with CSV data
            for r, row in enumerate(csv_data):
                for c, value in enumerate(row):
                    cell = table.cell(r, c)
                    cell.text = value
                    cell.paragraphs[0].alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
    
    # Add Extracted CSV table if extracted_csv is provided
    if extracted_csv and os.path.exists(extracted_csv):
        # Add CSV data to the document
        with open(extracted_csv, 'r', newline='', encoding='utf-8') as csvfile:
            csv_reader = csv.reader(csvfile)
            csv_data = list(csv_reader)

            # Determine table dimensions
            num_rows = len(csv_data)
            num_cols = max(len(row) for row in csv_data)

            # Add table with determined dimensions
            document.add_paragraph('')  # Add empty paragraph for spacing
            document.add_heading(f'Extracted Data: {os.path.basename(extracted_csv)}', level=2)
            table = document.add_table(rows=num_rows, cols=num_cols)
            table.style = 'Table Grid'

            # Populate table with CSV data
            for r, row in enumerate(csv_data):
                for c, value in enumerate(row):
                    cell = table.cell(r, c)
                    cell.text = value
                    cell.paragraphs[0].alignment = WD_PARAGRAPH_ALIGNMENT.CENTER

    # Add Prompt
    document.add_heading('Prompt Used', level=2)
    prompt_paragraph = document.add_paragraph(prompt)
    prompt_paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT
    
    # Add Extracted JSON
    document.add_heading('Extracted JSON', level=2)
    document.add_heading('data', level=3)
    json_paragraph = document.add_paragraph(json.dumps(data, indent=4))
    json_paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT
    document.add_heading('layout', level=3)
    json_paragraph = document.add_paragraph(json.dumps(layout, indent=4))
    json_paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT
    document.add_heading('config', level=3)
    json_paragraph = document.add_paragraph(json.dumps(config, indent=4))
    json_paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT
    
    # Add Updated JSON
    document.add_heading('Updated JSON', level=2)
    document.add_heading('updated_data', level=3)
    json_paragraph = document.add_paragraph(json.dumps(updated_data, indent=4))
    json_paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT
    document.add_heading('updated_layout', level=3)
    json_paragraph = document.add_paragraph(json.dumps(updated_layout, indent=4))
    json_paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT
    document.add_heading('updated_config', level=3)
    json_paragraph = document.add_paragraph(json.dumps(updated_config, indent=4))
    json_paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT
    
    # Add Updated Chart
    document.add_heading('Updated Chart', level=2)
    document.add_picture(chart_path, width=Inches(4.5))
    last_paragraph = document.paragraphs[-1]
    last_paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
    
    # Add Performance Metrics Table
    document.add_heading('Performance Metrics', level=2)
    table = document.add_table(rows=1, cols=4)
    table.style = 'Table Grid'
    
    # Define table headers
    headers = ["Task", "Model Used", "Description", "Time Taken (s)"]
    hdr_cells = table.rows[0].cells
    for i, header in enumerate(headers):
        hdr_cells[i].text = header
        hdr_cells[i].paragraphs[0].alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
        hdr_cells[i].paragraphs[0].bold = True
    
    # Add metrics data to the table
    for metric in performance_metrics:
        row_cells = table.add_row().cells
        for i, value in enumerate(metric):
            row_cells[i].text = value
            row_cells[i].paragraphs[0].alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
    
    # Save the document
    document.save(doc_path)
    print(f"Document saved as {doc_path}")


In [29]:
import time
import json
import os
import csv
from docx import Document

def trail_run_v2(image_loc, prompt, csv_path):
    chart_path = 'new_image.png'
    
    # Extract base name of the image without extension
    image_name = os.path.splitext(os.path.basename(image_loc))[0]
    
    # Create the target directory based on the image name
    json_folder = f'D:/Research Internship/chartQA dataset/ChartQA Dataset/test/jsons/{image_name}'
    os.makedirs(json_folder, exist_ok=True)
    
    # Extracted CSV directory
    csv_folder = 'D:/Research Internship/chartQA dataset/ChartQA Dataset/test/extracted_tables'
    os.makedirs(csv_folder, exist_ok=True)
    extracted_csv_path = os.path.join(csv_folder, f'{image_name}.csv')
    
    # Placeholder functions for simulation
    start_time = time.time()
    extracted_json = get_image_info(image_loc, prompt)
    time_info = time.time() - start_time
    print("Extracted Json:\n", extracted_json)
    
    data_json = extract_json_section(extracted_json, "data")
    layout_json = extract_json_section(extracted_json, "layout")
    config_json = extract_json_section(extracted_json, "config")
    csv_data = extract_json_section(extracted_json, "csv")
    
    # Convert JSON strings to Python dictionaries
    data = json.loads(data_json) if data_json else None
    layout = json.loads(layout_json) if layout_json else None
    config = json.loads(config_json) if config_json else None
    
    # Save extracted JSON files
    with open(os.path.join(json_folder, 'data.json'), 'w') as f:
        json.dump(data, f, indent=4)
    with open(os.path.join(json_folder, 'layout.json'), 'w') as f:
        json.dump(layout, f, indent=4)
    with open(os.path.join(json_folder, 'config.json'), 'w') as f:
        json.dump(config, f, indent=4)
    
    # Save extracted CSV data
    if csv_data:
        with open(extracted_csv_path, 'w', newline='') as f:
            csv_writer = csv.writer(f)
            csv_reader = csv.reader(csv_data.splitlines())
            csv_writer.writerows(csv_reader)
    
    start_time = time.time()
    updated_json = update_info(extracted_json)
    time_update = time.time() - start_time
    print("Updated Json:\n", updated_json)
    
    updated_data_json = extract_json_section(updated_json, "data")
    updated_layout_json = extract_json_section(updated_json, "layout")
    updated_config_json = extract_json_section(updated_json, "config")
    
    # Convert JSON strings to Python dictionaries
    updated_data = json.loads(updated_data_json) if updated_data_json else None
    updated_layout = json.loads(updated_layout_json) if updated_layout_json else None
    updated_config = json.loads(updated_config_json) if updated_config_json else None
    
    # Save updated JSON files
    with open(os.path.join(json_folder, 'updated_data.json'), 'w') as f:
        json.dump(updated_data, f, indent=4)
    with open(os.path.join(json_folder, 'updated_layout.json'), 'w') as f:
        json.dump(updated_layout, f, indent=4)
    with open(os.path.join(json_folder, 'updated_config.json'), 'w') as f:
        json.dump(updated_config, f, indent=4)
    
    # Simulate saving the plot image
    start_time = time.time()
    plot_from_ai_output_v2(updated_json)
    time_plot = time.time() - start_time
    
    # Define performance metrics
    performance_metrics = [
        ["Processing Image to JSON", "Gemini Vision Pro (default)", "Time taken to convert the uploaded image to JSON format", f"{time_info:.2f}"],
        ["Updating JSON", "ChatGPT 3.5 (Default)", "Time taken to update the JSON with new data", f"{time_update:.2f}"],
        ["Plotting Updated JSON", "N/A", "Time taken to plot the updated JSON", f"{time_plot:.2f}"]
    ]
    
    # Call the update_doc function with CSV data
    doc_path = 'ChartQA_prompt_test.docx'
    latex_path = 'ChartQA_prompt_test.tex'

    update_doc_v2(doc_path, image_loc, csv_path, prompt, data, layout, config, updated_data, updated_layout, updated_config, chart_path, performance_metrics, extracted_csv=extracted_csv_path)
    # generate_latex_doc_v2(latex_path, image_loc, csv_path, prompt, data, layout, config, updated_data, updated_layout, updated_config, chart_path, performance_metrics, extracted_csv=extracted_csv_path)


In [18]:
prompt2v6 = '''
Task: Extract detailed information from a given graph image and generate JSON files (`data.json`, `layout.json`, and `config.json`) compatible with Plotly. Use the specified delimiters to wrap each JSON section.

### Instructions:

#### Graph Analysis:

1. **Identify and categorize** the plot types present in the graph. Include both simple and complex plots:
   - Simple plots: `scatter`, `bar`, `pie`, `line`, `area`
   - Complex plots: `stacked bar`, `scatter3d`, `surface`.
   Confirm the presence of each plot type. Ensure that area plots are identified with `"type": "area"`.

2. **Extract data points** for each plot type:
   - For `scatter`, `line`, and `area` plots: Extract `x` and `y` values.
   - For `bar` and `stacked bar` plots: Extract `x` (categories) and `y` values.
   - For `pie` charts: Extract `labels` and `values`.
   - For `scatter3d` and `surface` plots: Extract `x`, `y`, and `z` values.

3. **Note any multiple plots** on the same figure and their configurations:
   - For stacked plots, identify the stacking order and group.
   - For area charts, ensure `fill` is specified correctly.

#### Generate `data.json`:

For each plot, include details such as:
- **type**: The type of plot (e.g., `"scatter"`, `"bar"`, `"pie"`, `"area"`, `"scatter3d"`, `"surface"`).
  - For area charts, use `"type": "scatter"` and specify `"fill"`.
- **x**, **y**, and **z** (if applicable): Arrays of data points for the `x`, `y`, and `z` axes.
- **labels** and **values** for pie charts.
- **mode**: For line and scatter plots (e.g., `"markers"`, `"lines"`, `"markers+lines"`).
- **name**: Legend entry for the plot.
- **marker**: Properties such as color, size, and symbol for markers.
- **line**: Properties such as color, width, and dash style for lines.
- **text**: Hover text for each data point.
- **hoverinfo**: Information displayed on hover (e.g., `"x+y+text"`).
- **fill**: For area charts, specify the fill (e.g., `"tozeroy"`, `"tonexty"`).
- **fillcolor**: The color used to fill the area under the line.
- **stackgroup**: For stacked bar charts, specify the group for stacking.

#### Generate `layout.json`:

Include details such as:
- **title**: The title of the graph, including text and font properties.
- **xaxis** and **yaxis**: Configuration including title, range, grid properties, and tick settings.
- **zaxis**: Configuration for 3D plots including title, range, and grid properties.
- **legend**: Properties including orientation and positioning.
- **margin**: Values for left, right, bottom, top, and padding.
- **plot_bgcolor** and **paper_bgcolor**: Background colors of the plot area and paper.
- **scene**: For 3D plots, configuration for the 3D scene, including camera angles, lighting, and aspect ratio.

#### Generate `config.json`:

Include configuration settings such as:
- **responsive**: Whether the graph is responsive.
- **displayModeBar**: Whether the mode bar is displayed.
- **modeBarButtonsToRemove**: List of mode bar buttons to remove.
- **scrollZoom**: Whether scrolling zoom is enabled.

#### Generate `<csv>`:

Include CSV data formatted as follows:
```csv
x,y
1,4
2,5
3,6
```
Replace x,y, 1,4, 2,5, and 3,6 with actual data extracted from the graph image.

Output Format:
Wrap each JSON output in specific XML-like tags:

<data> ... </data>
<layout> ... </layout>
<config> ... </config>
<csv> ... </csv>
Make sure to follow the JSON formatting guidelines:

Double Quotes for Keys and Strings: Ensure all keys and string values in JSON are enclosed in double quotes.
No Trailing Commas: Remove any trailing commas after the last element in arrays or objects.
Example Output:

<data>
[
  {
    "type": "scatter",
    "x": [1, 2, 3],
    "y": [4, 5, 6],
    "mode": "markers+lines",
    "name": "Line Plot",
    "marker": {
      "color": "rgba(75, 192, 192, 0.6)",
      "size": 8
    },
    "line": {
      "color": "rgba(75, 192, 192, 1)",
      "width": 2
    }
  },
  {
    "type": "area",
    "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
    "y": [400, 430, 550, 620, 580, 600, 630, 700, 670, 630, 600, 500],
    "name": "Housing",
    "marker": {
      "color": "rgba(255, 159, 64, 0.8)"
    },
    "line": {
      "color": "rgba(255, 159, 64, 1)",
      "width": 2
    }
  },

  {
    "type": "bar",
    "x": ["A", "B", "C"],
    "y": [10, 15, 13],
    "name": "Bar Chart",
    "marker": {
      "color": "rgba(255, 99, 132, 0.6)"
    },
    "hoverinfo": "x+y+text"
  },
  {
    "type": "scatter",
    "x": [1, 2, 3],
    "y": [6, 7, 8],
    "mode": "lines",
    "fill": "tozeroy",
    "name": "Area Chart",
    "line": {
      "color": "rgba(54, 162, 235, 1)",
      "width": 2
    },
    "fillcolor": "rgba(54, 162, 235, 0.5)"
  },
  {
    "type": "scatter3d",
    "x": [1, 2, 3],
    "y": [4, 5, 6],
    "z": [7, 8, 9],
    "mode": "markers",
    "name": "3D Scatter Plot",
    "marker": {
      "color": "rgba(255, 159, 64, 0.8)",
      "size": 5
    },
    "hoverinfo": "x+y+z+text"
  },
  {
    "type": "surface",
    "z": [
      [10, 10.625, 12.5, 15.625, 20],
      [5.625, 6.25, 8.125, 11.25, 15.625],
      [0, 1.25, 3.125, 6.25, 10.625]
    ],
    "name": "Surface Plot",
    "colorscale": "Viridis",
    "hoverinfo": "z+name"
  }
]
</data>
<layout>
{
  "title": {
    "text": "Complex Graph Example",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "color": "#000000"
    }
  },
  "xaxis": {
    "title": {
      "text": "X Axis",
      "font": {
        "family": "Arial, sans-serif",
        "size": 18,
        "color": "#000000"
      }
    },
    "showgrid": true,
    "gridcolor": "rgba(0, 0, 0, 0.1)",
    "zeroline": true,
    "zerolinecolor": "rgba(0, 0, 0, 0.1)"
  },
  "yaxis": {
    "title": {
      "text": "Y Axis",
      "font": {
        "family": "Arial, sans-serif",
        "size": 18,
        "color": "#000000"
      }
    },
    "showgrid": true,
    "gridcolor": "rgba(0, 0, 0, 0.1)",
    "zeroline": true,
    "zerolinecolor": "rgba(0, 0, 0, 0.1)"
  },
  "legend": {
    "orientation": "h",
    "x": 0.5,
    "xanchor": "center",
    "y": -0.2,
    "font": {
      "family": "Arial, sans-serif",
      "size": 12,
      "color": "#000000"
    }
  },
  "margin": {
    "l": 60,
    "r": 30,
    "b": 60,
    "t": 60
  },
  "plot_bgcolor": "#ffffff",
  "paper_bgcolor": "#ffffff",
  "scene": {
    "xaxis": {"title": "X Axis"},
    "yaxis": {"title": "Y Axis"},
    "zaxis": {"title": "Z Axis"}
  }
}
</layout>
<config>
{
  "responsive": true,
  "displayModeBar": true,
  "modeBarButtonsToRemove": ["toImage"],
  "scrollZoom": true
}
</config>
<csv>
x,y
1,4
2,5
3,6
</csv>'''

In [28]:
# Example usage:
image_loc = 'D:/Research Internship/chartQA dataset/ChartQA Dataset/train/png/34.png'
csv_path = 'D:/Research Internship/chartQA dataset/ChartQA Dataset/train/tables/34.csv'

trail_run_v2(image_loc, prompt2v6, csv_path)


Extracted Json:
  <data>
[
  {
    "type": "scatter",
    "x": [2000, 2004, 2008, 2012, 2016, 2020],
    "y": [50, 67, 63, 63, 74, 83],
    "mode": "lines+markers",
    "name": "Presidential Election",
    "marker": {
      "color": "rgba(255, 159, 64, 0.8)",
      "size": 8
    },
    "line": {
      "color": "rgba(255, 159, 64, 1)",
      "width": 2
    },
    "hoverinfo": "x+y+text"
  },
  {
    "type": "scatter",
    "x": [2000, 2004, 2008, 2012, 2016, 2020],
    "y": [44, 29, 32, 34, 22, 16],
    "mode": "lines+markers",
    "name": "Doesn't Matter",
    "marker": {
      "color": "rgba(54, 162, 235, 0.8)",
      "size": 8
    },
    "line": {
      "color": "rgba(54, 162, 235, 1)",
      "width": 2
    },
    "hoverinfo": "x+y+text"
  }
]
</data>
<layout>
{
  "title": {
    "text": "Voters Saying it Matters",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "color": "#000000"
    }
  },
  "xaxis": {
    "title": {
      "text": "Year",
      "font": {
  

Document saved as ChartQA_prompt_test.docx
LaTeX document saved as ChartQA_prompt_test.tex


In [31]:
import os
import glob

def process_all_images(png_folder, csv_folder, prompt):
    """
    Processes all PNG images and their corresponding CSV files in the given directories.
    
    Args:
        png_folder (str): Path to the folder containing PNG images.
        csv_folder (str): Path to the folder containing CSV files.
        prompt (str): The prompt to use for processing.
    """
    # Find all PNG files in the png_folder
    png_files = glob.glob(os.path.join(png_folder, '*.png'))
    
    for png_file in png_files:
        # Extract the base name without extension
        base_name = os.path.splitext(os.path.basename(png_file))[0]
        
        # Corresponding CSV file path
        csv_file = os.path.join(csv_folder, f"{base_name}.csv")
        
        # Only process if the CSV file exists
        if os.path.exists(csv_file):
            print(f"Processing {png_file} with {csv_file}")
            
            # Run the trail_run_v2 function
            trail_run_v2(png_file, prompt, csv_file)

# Example usage
png_folder = 'D:/Research Internship/chartQA dataset/ChartQA Dataset/test/png'
csv_folder = 'D:/Research Internship/chartQA dataset/ChartQA Dataset/test/tables'

process_all_images(png_folder, csv_folder, prompt2v6)


Processing D:/Research Internship/chartQA dataset/ChartQA Dataset/test/png\00339007006077.png with D:/Research Internship/chartQA dataset/ChartQA Dataset/test/tables\00339007006077.csv
Extracted Json:
  <data>
[
  {
    "type": "bar",
    "x": [
      "Haiti",
      "Libya",
      "Morocco",
      "Lebanon",
      "Colombia"
    ],
    "y": [
      6.12,
      5.32,
      5.11,
      4.5,
      1.45
    ],
    "name": "Countries",
    "marker": {
      "color": "rgba(55, 128, 191, 0.6)"
    },
    "hoverinfo": "x+y+text"
  }
]
</data>
<layout>
{
  "title": {
    "text": "Share of Children who are Wasted, 2010",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "color": "#000000"
    }
  },
  "xaxis": {
    "title": {
      "text": "Countries",
      "font": {
        "family": "Arial, sans-serif",
        "size": 18,
        "color": "#000000"
      }
    },
    "showgrid": true,
    "gridcolor": "rgba(0, 0, 0, 0.1)",
    "zeroline": true,
    "zerolinecolor": 

Document saved as ChartQA_prompt_test.docx
Processing D:/Research Internship/chartQA dataset/ChartQA Dataset/test/png\00795994017065.png with D:/Research Internship/chartQA dataset/ChartQA Dataset/test/tables\00795994017065.csv
Extracted Json:
  <data>
[
  {
    "type": "scatter",
    "x": [2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012],
    "y": [56.3, 55.9, 55.5, 55.1, 54.7, 54.3, 53.9, 53.4],
    "name": "Myanmar",
    "marker": {
      "color": "rgba(255, 159, 64, 0.8)"
    },
    "line": {
      "color": "rgba(255, 159, 64, 1)",
      "width": 2
    }
  },
  {
    "type": "scatter",
    "x": [2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012],
    "y": [29.1, 28.7, 28.3, 28.1, 27.9, 27.7, 27.5, 27.3],
    "name": "Zambia",
    "marker": {
      "color": "rgba(75, 192, 192, 0.6)",
      "size": 8
    },
    "line": {
      "color": "rgba(75, 192, 192, 1)",
      "width": 2
    }
  },
  {
    "type": "scatter",
    "x": [2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012],
    "y": [20.2,

Document saved as ChartQA_prompt_test.docx
Processing D:/Research Internship/chartQA dataset/ChartQA Dataset/test/png\01001540004402.png with D:/Research Internship/chartQA dataset/ChartQA Dataset/test/tables\01001540004402.csv
Extracted Json:
  <data>
[
  {
    "type": "bar",
    "x": [
      "Finland",
      "Georgia",
      "Western Asia"
    ],
    "y": [
      175.09,
      79.84,
      69.62
    ],
    "name": "Daily Meat Consumption"
  }
]
</data>
<layout>
{
  "title": {
    "text": "Daily Meat Consumption Per Person, 1997",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "color": "#000000"
    }
  },
  "xaxis": {
    "title": {
      "text": "Country",
      "font": {
        "family": "Arial, sans-serif",
        "size": 18,
        "color": "#000000"
      }
    },
    "showgrid": true,
    "gridcolor": "rgba(0, 0, 0, 0.1)",
    "zeroline": true,
    "zerolinecolor": "rgba(0, 0, 0, 0.1)"
  },
  "yaxis": {
    "title": {
      "text": "Daily Meat Con

Document saved as ChartQA_prompt_test.docx
Processing D:/Research Internship/chartQA dataset/ChartQA Dataset/test/png\01499440003158.png with D:/Research Internship/chartQA dataset/ChartQA Dataset/test/tables\01499440003158.csv
Extracted Json:
  <data>
[
  {
    "type": "scatter",
    "x": [2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017],
    "y": [0.96, 0.96, 0.96, 0.96, 0.96, 0.96, 0.96, 0.96, 0.96, 0.96, 0.96, 1],
    "mode": "lines+markers",
    "name": "Slovenia",
    "marker": {
      "color": "rgba(255, 159, 64, 0.8)",
      "size": 8
    },
    "line": {
      "color": "rgba(255, 159, 64, 1)",
      "width": 2
    }
  },
  {
    "type": "scatter",
    "x": [2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017],
    "y": [0.96, 0.96, 0.96, 0.96, 0.96, 0.96, 0.96, 0.96, 0.96, 0.96, 0.96, 0.96],
    "mode": "lines+markers",
    "name": "Albania",
    "marker": {
      "color": "rgba(75, 192, 192, 0.6)",
      "size": 8
    },
    "line": {

Document saved as ChartQA_prompt_test.docx
Processing D:/Research Internship/chartQA dataset/ChartQA Dataset/test/png\01729694006399.png with D:/Research Internship/chartQA dataset/ChartQA Dataset/test/tables\01729694006399.csv
Extracted Json:
  <data>
[
  {
    "type": "bar",
    "x": [
      "Ecuador",
      "China",
      "Ireland",
      "Armenia",
      "Israel"
    ],
    "y": [
      0.02,
      0.02,
      0.01,
      0,
      0
    ],
    "name": "Natural Disaster Deaths as a Share of Total Deaths, 2014",
    "marker": {
      "color": "rgba(55, 128, 191, 0.6)"
    },
    "hoverinfo": "x+y+text"
  }
]
</data>
<layout>
{
  "title": {
    "text": "Deaths from natural disasters as a share of total deaths, 2014",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "color": "#000000"
    }
  },
  "xaxis": {
    "title": {
      "text": "Country",
      "font": {
        "family": "Arial, sans-serif",
        "size": 18,
        "color": "#000000"
      }
    

Document saved as ChartQA_prompt_test.docx
Processing D:/Research Internship/chartQA dataset/ChartQA Dataset/test/png\02267499005481.png with D:/Research Internship/chartQA dataset/ChartQA Dataset/test/tables\02267499005481.csv
Extracted Json:
  <data>
[
  {
    "type": "scatter",
    "x": [
      2005,
      2006,
      2007,
      2008,
      2009,
      2010,
      2011,
      2012,
      2013,
      2014,
      2015,
      2016
    ],
    "y": [
      20.1,
      20.4,
      20.8,
      20.9,
      19.8,
      19.3,
      20.3,
      18.8,
      20.2,
      20.5,
      25.2,
      24.1
    ],
    "mode": "lines+markers",
    "name": "Binge Drinking",
    "marker": {
      "color": "rgba(153, 142, 255, 0.8)",
      "size": 8
    },
    "line": {
      "color": "rgba(153, 142, 255, 1)",
      "width": 2
    },
    "hoverinfo": "text"
  }
]
</data>
<layout>
{
  "title": {
    "text": "Share of drinkers who 'binged' on heaviest day of drinking in last week, United Kingdom, 2005 to 2016

Document saved as ChartQA_prompt_test.docx
Processing D:/Research Internship/chartQA dataset/ChartQA Dataset/test/png\02534409005100.png with D:/Research Internship/chartQA dataset/ChartQA Dataset/test/tables\02534409005100.csv
Extracted Json:
  <data>
[
  {
    "type": "bar",
    "x": [
      "Heart disease",
      "Cancers",
      "Stroke",
      "Accidents",
      "Pneumonia and influenza",
      "Road accidents",
      "Diabetes",
      "Suicide",
      "Tuberculosis"
    ],
    "y": [
      371.7,
      155.3,
      104.7,
      58.1,
      32.5,
      27.1,
      17.7,
      10.9,
      3.9
    ],
    "name": "Number of Deaths",
    "marker": {
      "color": "rgba(255, 99, 132, 0.6)"
    },
    "hoverinfo": "x+y+text"
  }
]
</data>
<layout>
{
  "title": {
    "text": "Death Rates Through the 20th Century, United States, 1966",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "color": "#000000"
    }
  },
  "xaxis": {
    "title": {
      "text": "Cause 

Document saved as ChartQA_prompt_test.docx
Processing D:/Research Internship/chartQA dataset/ChartQA Dataset/test/png\03672594001226.png with D:/Research Internship/chartQA dataset/ChartQA Dataset/test/tables\03672594001226.csv
Extracted Json:
  <data>
[
  {
    "type": "bar",
    "x": [
      "North America",
      "Philippines",
      "Croatia"
    ],
    "y": [
      3245,
      1846.5,
      0
    ],
    "name": "Countries"
  }
]
</data>
<layout>
{
  "title": {
    "text": "Installed Geothermal Energy Capacity, 2005",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "color": "#000000"
    }
  },
  "xaxis": {
    "title": {
      "text": "Countries",
      "font": {
        "family": "Arial, sans-serif",
        "size": 18,
        "color": "#000000"
      }
    },
    "showgrid": true,
    "gridcolor": "rgba(0, 0, 0, 0.1)",
    "zeroline": true,
    "zerolinecolor": "rgba(0, 0, 0, 0.1)"
  },
  "yaxis": {
    "title": {
      "text": "Capacity (MW)",
      

Document saved as ChartQA_prompt_test.docx
Processing D:/Research Internship/chartQA dataset/ChartQA Dataset/test/png\04214944001005.png with D:/Research Internship/chartQA dataset/ChartQA Dataset/test/tables\04214944001005.csv
Extracted Json:
  <data>
[
  {
    "type": "bar",
    "x": [
      "Obesity",
      "High blood sugar",
      "Smoking",
      "Air pollution (outdoor & indoor)",
      "High cholesterol",
      "Outdoor air pollution",
      "Diet low in vegetables",
      "Indoor air pollution",
      "Diet low in fruits",
      "Secondhand smoke",
      "Drug use",
      "Iron deficiency",
      "Diet high in salt",
      "Child wasting",
      "Low physical activity",
      "Vitamin A deficiency",
      "Unsafe water source",
      "Unsafe sanitation",
      "Non-exclusive breastfeeding",
      "Child stunting",
      "Zinc deficiency"
    ],
    "y": [
      1001,
      879,
      676,
      474,
      360,
      328,
      177,
      169,
      168,
      144,
      133,
 

Document saved as ChartQA_prompt_test.docx
Processing D:/Research Internship/chartQA dataset/ChartQA Dataset/test/png\04675954001679.png with D:/Research Internship/chartQA dataset/ChartQA Dataset/test/tables\04675954001679.csv
Extracted Json:
  <data>
[
  {
    "type": "bar",
    "x": [
      "Cayman Islands",
      "Belize"
    ],
    "y": [
      95.45,
      7.21
    ],
    "name": "Teachers Trained"
  }
]
</data>
<layout>
{
  "title": {
    "text": "Share of Teachers in Pre-Primary Education who are Trained, 2004",
    "font": {
      "family": "Arial, sans-serif",
      "size": 20,
      "color": "#333333"
    }
  },
  "xaxis": {
    "title": {
      "text": "Country",
      "font": {
        "family": "Arial, sans-serif",
        "size": 16,
        "color": "#333333"
      }
    },
    "tickfont": {
      "family": "Arial, sans-serif",
      "size": 12,
      "color": "#333333"
    }
  },
  "yaxis": {
    "title": {
      "text": "Percentage",
      "font": {
        "family": 

Document saved as ChartQA_prompt_test.docx
Processing D:/Research Internship/chartQA dataset/ChartQA Dataset/test/png\04960398003706.png with D:/Research Internship/chartQA dataset/ChartQA Dataset/test/tables\04960398003706.csv
Extracted Json:
  <data>
[
  {
    "type": "scatter",
    "x": [1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011],
    "y": [1.51, 1.53, 1.55, 1.56, 1.53, 1.52, 1.54, 1.55, 1.57, 1.76, 1.61, 1.64, 1.69, 1.61, 1.78, 1.63, 1.67, 1.69, 1.71, 1.68, 1.83],
    "mode": "lines",
    "name": "Chicken meat yield per animal, 1991 to 2011",
    "line": {
      "color": "rgba(75, 192, 192, 1)",
      "width": 2
    }
  },
  {
    "type": "scatter",
    "x": [2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010],
    "y": [1.57, 1.61, 1.64, 1.69, 1.61, 1.78, 1.63, 1.67, 1.69, 1.71, 1.68],
    "mode": "markers",
    "name": "Bahrain",
    "marker": {
      "color": "rgba(255, 99, 132, 0.6)

Document saved as ChartQA_prompt_test.docx
Processing D:/Research Internship/chartQA dataset/ChartQA Dataset/test/png\05114418009630.png with D:/Research Internship/chartQA dataset/ChartQA Dataset/test/tables\05114418009630.csv
Extracted Json:
  <data>
[
  {
    "type": "bar",
    "x": [
      "United States Virgin Islands",
      "Gabon",
      "Southern Sub-Saharan Africa",
      "Tonga"
    ],
    "y": [
      0.13,
      0.13,
      0.07,
      0.01
    ],
    "name": "Death Rate",
    "marker": {
      "color": "rgba(55, 128, 191, 0.6)"
    },
    "hoverinfo": "x+y+text"
  }
]
</data>
<layout>
{
  "title": {
    "text": "Death rates from cocaine overdoses, 2011",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "color": "#000000"
    }
  },
  "xaxis": {
    "title": {
      "text": "Country",
      "font": {
        "family": "Arial, sans-serif",
        "size": 18,
        "color": "#000000"
      }
    },
    "showgrid": true,
    "gridcolor": "rgba(0, 

Document saved as ChartQA_prompt_test.docx
Processing D:/Research Internship/chartQA dataset/ChartQA Dataset/test/png\05705464003774.png with D:/Research Internship/chartQA dataset/ChartQA Dataset/test/tables\05705464003774.csv
Extracted Json:
  <data>
[
  {
    "type": "bar",
    "x": [
      "United Kingdom",
      "Colombia",
      "Mauritius"
    ],
    "y": [
      0.3,
      0.1,
      0.06
    ],
    "name": "Government Expenditure on Pre-Primary Education as Share of GDP, 2005"
  }
]
</data>
<layout>
{
  "title": {
    "text": "Government Expenditure on Pre-Primary Education as Share of GDP, 2005",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "color": "#000000"
    }
  },
  "xaxis": {
    "title": {
      "text": "Country",
      "font": {
        "family": "Arial, sans-serif",
        "size": 18,
        "color": "#000000"
      }
    },
    "showgrid": true,
    "gridcolor": "rgba(0, 0, 0, 0.1)",
    "zeroline": true,
    "zerolinecolor": "rgba(0

Document saved as ChartQA_prompt_test.docx
Processing D:/Research Internship/chartQA dataset/ChartQA Dataset/test/png\05810070001466.png with D:/Research Internship/chartQA dataset/ChartQA Dataset/test/tables\05810070001466.csv
Extracted Json:
  <data>
[
  {
    "type": "bar",
    "x": [
      "Europe",
      "France",
      "Argentina"
    ],
    "y": [
      2.16,
      1.93,
      0.67
    ],
    "name": "Countries",
    "marker": {
      "color": [
        "rgba(54, 162, 235, 0.6)",
        "rgba(255, 159, 64, 0.6)",
        "rgba(255, 99, 132, 0.6)"
      ]
    },
    "hoverinfo": "x+y+text"
  }
]
</data>
<layout>
{
  "title": {
    "text": "Rapeseed yields, 1976",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "color": "#000000"
    }
  },
  "xaxis": {
    "title": {
      "text": "Countries",
      "font": {
        "family": "Arial, sans-serif",
        "size": 18,
        "color": "#000000"
      }
    },
    "showgrid": true,
    "gridcolor": "rgba

Document saved as ChartQA_prompt_test.docx
Processing D:/Research Internship/chartQA dataset/ChartQA Dataset/test/png\06236926002285.png with D:/Research Internship/chartQA dataset/ChartQA Dataset/test/tables\06236926002285.csv
Extracted Json:
  <data>
[
  {
    "type": "bar",
    "x": [
      "Unsafe sex",
      "Alcohol use",
      "High blood pressure",
      "Smoking",
      "High body-mass index (obesity)",
      "High blood sugar",
      "Diet low in fruits",
      "Drug use",
      "Diet low in vegetables",
      "Outdoor air pollution",
      "Household air pollution",
      "Unsafe water source",
      "Secondhand smoke",
      "Iron deficiency",
      "Poor sanitation",
      "No access to handwashing facility",
      "Low physical activity",
      "Low bone mineral density"
    ],
    "y": [
      1.31,
      880756,
      619328,
      597653,
      409812,
      334864,
      316783,
      226833,
      200275,
      194601,
      181256,
      122777,
      105318,
      

Document saved as ChartQA_prompt_test.docx
Processing D:/Research Internship/chartQA dataset/ChartQA Dataset/test/png\07019431002493.png with D:/Research Internship/chartQA dataset/ChartQA Dataset/test/tables\07019431002493.csv
Extracted Json:
  <data>
[
  {
    "type": "bar",
    "x": [
      "Mali",
      "Denmark",
      "Kenya"
    ],
    "y": [
      27.5,
      18.3,
      17.3
    ],
    "name": "Mortality Rate"
  }
]
</data>
<layout>
{
  "title": {
    "text": "Mortality from non-communicable diseases, 2000",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "color": "#000000"
    }
  },
  "xaxis": {
    "title": {
      "text": "Country",
      "font": {
        "family": "Arial, sans-serif",
        "size": 18,
        "color": "#000000"
      }
    },
    "showgrid": true,
    "gridcolor": "rgba(0, 0, 0, 0.1)",
    "zeroline": true,
    "zerolinecolor": "rgba(0, 0, 0, 0.1)"
  },
  "yaxis": {
    "title": {
      "text": "Mortality Rate (%)",
      "f

Document saved as ChartQA_prompt_test.docx
Processing D:/Research Internship/chartQA dataset/ChartQA Dataset/test/png\08263936005626.png with D:/Research Internship/chartQA dataset/ChartQA Dataset/test/tables\08263936005626.csv
Extracted Json:
  <data>
[
  {
    "type": "bar",
    "x": [
      "Lamb & Mutton",
      "Beef (beef herd)",
      "Cheese",
      "Milk",
      "Beef (dairy herd)",
      "Pig Meat",
      "Nuts",
      "Other Pulses",
      "Poultry Meat",
      "Eggs",
      "Grains",
      "Fish (farmed)",
      "Groundnuts",
      "Peas",
      "Tofu (soybeans)",
      "Prawns (farmed)"
    ],
    "y": [
      184.8,
      163.6,
      39.8,
      27.1,
      21.9,
      10.7,
      7.9,
      7.3,
      7.1,
      5.7,
      4.6,
      3.7,
      3.5,
      3.4,
      2.2,
      2
    ],
    "name": "Land Use",
    "marker": {
      "color": "rgba(55, 128, 191, 0.6)"
    },
    "hoverinfo": "x+y+text"
  }
]
</data>
<layout>
{
  "title": {
    "text": "Land use per 100 gra

Document saved as ChartQA_prompt_test.docx
Processing D:/Research Internship/chartQA dataset/ChartQA Dataset/test/png\08524901006324.png with D:/Research Internship/chartQA dataset/ChartQA Dataset/test/tables\08524901006324.csv
Extracted Json:
  <data>
[
  {
    "type": "scatter",
    "x": [
      2010,
      2011,
      2012,
      2013,
      2014
    ],
    "y": [
      1900,
      2200,
      2350,
      2500,
      2800
    ],
    "mode": "lines+markers",
    "name": "Number of unsheltered homeless people in England, 2010 to 2014",
    "marker": {
      "color": "rgba(75, 192, 192, 0.6)",
      "size": 8
    },
    "line": {
      "color": "rgba(75, 192, 192, 1)",
      "width": 2
    }
  }
]
</data>
<layout>
{
  "title": {
    "text": "Number of unsheltered homeless people in England, 2010 to 2014",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "color": "#000000"
    }
  },
  "xaxis": {
    "title": {
      "text": "Year",
      "font": {
        "fam

Document saved as ChartQA_prompt_test.docx
Processing D:/Research Internship/chartQA dataset/ChartQA Dataset/test/png\08546788003698.png with D:/Research Internship/chartQA dataset/ChartQA Dataset/test/tables\08546788003698.csv
Extracted Json:
  <data>
[
  {
    "type": "bar",
    "x": [
      "Serbia and Montenegro",
      "South America",
      "India",
      "Liberia",
      "Rwanda"
    ],
    "y": [
      4.26,
      3.19,
      2.42,
      1.12,
      0.91
    ],
    "name": "Cereal Yield, 2001 (tonnes per hectare)"
  }
]
</data>
<layout>
{
  "title": {
    "text": "Cereal Yield, 2001 (tonnes per hectare)",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "color": "#000000"
    }
  },
  "xaxis": {
    "title": {
      "text": "Country",
      "font": {
        "family": "Arial, sans-serif",
        "size": 18,
        "color": "#000000"
      }
    },
    "tickangle": -45
  },
  "yaxis": {
    "title": {
      "text": "Cereal Yield (tonnes per hectare)",

Document saved as ChartQA_prompt_test.docx
Processing D:/Research Internship/chartQA dataset/ChartQA Dataset/test/png\08686631003296.png with D:/Research Internship/chartQA dataset/ChartQA Dataset/test/tables\08686631003296.csv
Extracted Json:
  <data>
[
  {
    "type": "bar",
    "x": [
      "Cuba",
      "Nicaragua"
    ],
    "y": [
      3.52,
      0.28
    ],
    "name": "Government expenditure on secondary education as share of GDP, 2006"
  }
]
</data>
<layout>
{
  "title": {
    "text": "Government expenditure on secondary education as share of GDP, 2006",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "color": "#000000"
    }
  },
  "xaxis": {
    "title": {
      "text": "Country",
      "font": {
        "family": "Arial, sans-serif",
        "size": 18,
        "color": "#000000"
      }
    },
    "showgrid": true,
    "gridcolor": "rgba(0, 0, 0, 0.1)",
    "zeroline": true,
    "zerolinecolor": "rgba(0, 0, 0, 0.1)"
  },
  "yaxis": {
    "title

Document saved as ChartQA_prompt_test.docx
Processing D:/Research Internship/chartQA dataset/ChartQA Dataset/test/png\10099.png with D:/Research Internship/chartQA dataset/ChartQA Dataset/test/tables\10099.csv
Extracted Json:
  <data>
[
  {
    "type": "bar",
    "x": ["Sep 2015", "Jul 2015"],
    "y": [49, 45],
    "name": "Disapprove",
    "marker": {
      "color": "rgba(255, 99, 132, 0.6)"
    },
    "hoverinfo": "x+y+text"
  },
  {
    "type": "bar",
    "x": ["Sep 2015", "Jul 2015"],
    "y": [21, 33],
    "name": "Approve",
    "marker": {
      "color": "rgba(54, 162, 235, 0.6)"
    },
    "hoverinfo": "x+y+text"
  }
]
</data>
<layout>
{
  "title": {
    "text": "Declining Support for Iran Nuclear Deal",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "color": "#000000"
    }
  },
  "xaxis": {
    "title": {
      "text": "Time",
      "font": {
        "family": "Arial, sans-serif",
        "size": 18,
        "color": "#000000"
      }
    },
    "s

Document saved as ChartQA_prompt_test.docx
Processing D:/Research Internship/chartQA dataset/ChartQA Dataset/test/png\10146.png with D:/Research Internship/chartQA dataset/ChartQA Dataset/test/tables\10146.csv
Extracted Json:
  <data>
[
  {
    "type": "scatter",
    "x": [2002, 2007, 2009, 2011, 2013, 2015],
    "y": [79, 66, 68, 76, 69, 72],
    "mode": "lines+markers",
    "name": "Dissatisfied",
    "marker": {
      "color": "rgba(255, 99, 132, 0.6)"
    },
    "line": {
      "color": "rgba(255, 99, 132, 1)",
      "width": 2
    },
    "hoverinfo": "x+y+text"
  },
  {
    "type": "scatter",
    "x": [2002, 2007, 2009, 2011, 2013, 2015],
    "y": [16, 30, 20, 22, 29, 27],
    "mode": "lines+markers",
    "name": "Satisfied",
    "marker": {
      "color": "rgba(54, 162, 235, 0.6)"
    },
    "line": {
      "color": "rgba(54, 162, 235, 1)",
      "width": 2
    },
    "hoverinfo": "x+y+text"
  }
]
</data>
<layout>
{
  "title": {
    "text": "Dissatisfaction with Mexico's Directio

Document saved as ChartQA_prompt_test.docx
Processing D:/Research Internship/chartQA dataset/ChartQA Dataset/test/png\10160.png with D:/Research Internship/chartQA dataset/ChartQA Dataset/test/tables\10160.csv
Extracted Json:
  <data>
[
  {
    "type": "bar",
    "x": [
      "Read a message such as text or email",
      "Took a photo or video",
      "Sent a message such as text or email",
      "Received an incoming call",
      "Checked to see if you've received any alerts",
      "Placed a call",
      "Used an app",
      "Searched or browsed the web"
    ],
    "y": [
      61,
      58,
      52,
      52,
      34,
      33,
      29,
      25
    ],
    "name": "Social Activity",
    "marker": {
      "color": "rgba(55, 128, 191, 0.6)"
    },
    "hoverinfo": "x+y+text"
  }
]
</data>
<layout>
{
  "title": {
    "text": "Cell Phone Usage During Social Activities",
    "font": {
      "family": "Arial, sans-serif",
      "size": 20,
      "color": "#000000"
    }
  },
  "xaxis":

Document saved as ChartQA_prompt_test.docx
Processing D:/Research Internship/chartQA dataset/ChartQA Dataset/test/png\10222.png with D:/Research Internship/chartQA dataset/ChartQA Dataset/test/tables\10222.csv
Extracted Json:
  <data>
[
  {
    "type": "bar",
    "x": ["The Daily Show", "The Colbert Report", "Anderson Cooper 360", "NBC Nightly News", "The Rachel Maddow Show", "The O'Reilly Factor"],
    "y": [36, 33, 47, 52, 53, 54],
    "name": "Median Age of The Daily Show Viewers Lower than Other TV News Programs",
    "marker": {
      "color": "rgba(255, 99, 132, 0.6)"
    },
    "hoverinfo": "x+y+text"
  }
]
</data>
<layout>
{
  "title": {
    "text": "Median Age of The Daily Show Viewers Lower than Other TV News Programs",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "color": "#000000"
    }
  },
  "xaxis": {
    "title": {
      "text": "TV News Program",
      "font": {
        "family": "Arial, sans-serif",
        "size": 18,
        "color": "#

Document saved as ChartQA_prompt_test.docx
Processing D:/Research Internship/chartQA dataset/ChartQA Dataset/test/png\10227.png with D:/Research Internship/chartQA dataset/ChartQA Dataset/test/tables\10227.csv
Extracted Json:
  <data>
[
  {
    "type": "bar",
    "x": [
      "Total",
      "White",
      "Black",
      "Hispanic",
      "Republican",
      "Independent",
      "Democrat"
    ],
    "y": [
      34,
      38,
      28,
      52,
      49,
      37,
      19
    ],
    "name": "Party Divide Wider Than Racial Gap in Views of Removing Confederate Flag",
    "marker": {
      "color": "rgba(255, 99, 132, 0.6)"
    },
    "hoverinfo": "x+y+text"
  }
]
</data>
<layout>
{
  "title": {
    "text": "Party Divide Wider Than Racial Gap in Views of Removing Confederate Flag",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "color": "#000000"
    }
  },
  "xaxis": {
    "title": {
      "text": "Party",
      "font": {
        "family": "Arial, sans-serif

Document saved as ChartQA_prompt_test.docx
Processing D:/Research Internship/chartQA dataset/ChartQA Dataset/test/png\10365965014074.png with D:/Research Internship/chartQA dataset/ChartQA Dataset/test/tables\10365965014074.csv
Extracted Json:
  <data>
[
  {
    "type": "scatter",
    "x": [2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017],
    "y": [48.86,53.19,50.66,44.63,46.96,46.13,38.66,38.66,38.66,40.37,37.69,49.34,53.31,60.34],
    "mode": "lines+markers",
    "name": "Belize",
    "marker": {
      "color": "rgba(255, 159, 64, 0.8)",
      "size": 5
    },
    "line": {
      "color": "rgba(255, 159, 64, 1)",
      "width": 2
    },
    "hoverinfo": "x+y+text"
  },
  {
    "type": "scatter",
    "x": [2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017],
    "y": [44.63,55.37,49.34,53.19,43.92,37.69,37.69,37.69,37.69,37.69,49.34,49.34,41.56,44.63],
    "mode": "lines+markers",
    "name": "Papua New Guinea",
    "marker": {
      "color": 

Document saved as ChartQA_prompt_test.docx
Processing D:/Research Internship/chartQA dataset/ChartQA Dataset/test/png\10476815004500.png with D:/Research Internship/chartQA dataset/ChartQA Dataset/test/tables\10476815004500.csv
Extracted Json:
  <data>
[
  {
    "type": "scatter",
    "x": [1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010],
    "y": [83.673, 86.567, 86.111, 80.056, 78.085, 77.586, 78.333, 77.167, 80.278, 83.056, 83.056, 83.056, 83.056, 86.966, 88.525, 88.525, 88.525, 88.525, 88.525, 88.525, 90.218],
    "mode": "lines",
    "name": "Albania",
    "line": {
      "color": "rgba(0, 176, 255, 1)",
      "width": 2
    }
  },
  {
    "type": "scatter",
    "x": [1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010],
    "y": [83.673, 86.567, 86.111, 80.056, 78.085, 77.586, 78.333, 77.167, 80.278, 83.056, 83.056, 83.056, 83.056,

Document saved as ChartQA_prompt_test.docx
Processing D:/Research Internship/chartQA dataset/ChartQA Dataset/test/png\10505.png with D:/Research Internship/chartQA dataset/ChartQA Dataset/test/tables\10505.csv
Extracted Json:
  <data>
[
  {
    "type": "bar",
    "x": [
      "Relations w/ China",
      "Relations w/ U.S.",
      "Relations w/ Ukraine",
      "Relations w/ EU",
      "Energy policy",
      "Economy",
      "Corruption"
    ],
    "y": [
      90,
      85,
      83,
      82,
      73,
      70,
      62
    ],
    "name": "Approval Ratings",
    "orientation": "h",
    "marker": {
      "color": "rgba(255, 159, 64, 0.8)"
    },
    "hoverinfo": "x+y+text"
  }
]
</data>
<layout>
{
  "title": {
    "text": "Russians Overwhelmingly Support Putin's Foreign and Domestic Policies",
    "font": {
      "family": "Arial, sans-serif",
      "size": 20,
      "color": "#000000"
    }
  },
  "xaxis": {
    "title": {
      "text": "Approval Percentage",
      "font": {
        "

Document saved as ChartQA_prompt_test.docx
Processing D:/Research Internship/chartQA dataset/ChartQA Dataset/test/png\10529.png with D:/Research Internship/chartQA dataset/ChartQA Dataset/test/tables\10529.csv
Extracted Json:
  <data>
[
  {
    "type": "scatter",
    "x": [2013, 2014, 2015],
    "y": [51, 23, 15],
    "name": "US",
    "marker": {
      "color": "rgba(255, 99, 132, 0.6)"
    },
    "hoverinfo": "x+y+text"
  },
  {
    "type": "scatter",
    "x": [2013, 2014, 2015],
    "y": [63, 39, 31],
    "name": "EU",
    "marker": {
      "color": "rgba(54, 162, 235, 0.6)"
    },
    "hoverinfo": "x+y+text"
  }
]
</data>
<layout>
{
  "title": {
    "text": "Russian Ratings of U.S., E.U. Collapse",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "color": "#000000"
    }
  },
  "xaxis": {
    "title": {
      "text": "Year",
      "font": {
        "family": "Arial, sans-serif",
        "size": 18,
        "color": "#000000"
      }
    },
    "showgrid": 

Document saved as ChartQA_prompt_test.docx
Processing D:/Research Internship/chartQA dataset/ChartQA Dataset/test/png\10593.png with D:/Research Internship/chartQA dataset/ChartQA Dataset/test/tables\10593.csv
Extracted Json:
  <data>
[
  {
    "type": "scatter",
    "x": [1, 2, 3],
    "y": [4, 5, 6],
    "mode": "markers+lines",
    "name": "Line Plot",
    "marker": {
      "color": "rgba(75, 192, 192, 0.6)",
      "size": 8
    },
    "line": {
      "color": "rgba(75, 192, 192, 1)",
      "width": 2
    }
  },
  {
    "type": "area",
    "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
    "y": [400, 430, 550, 620, 580, 600, 630, 700, 670, 630, 600, 500],
    "name": "Housing",
    "marker": {
      "color": "rgba(255, 159, 64, 0.8)"
    },
    "line": {
      "color": "rgba(255, 159, 64, 1)",
      "width": 2
    },
    "fill": "tozeroy"
  },

  {
    "type": "bar",
    "x": ["A", "B", "C"],
    "y": [10, 15, 13],
    "name": "Bar Chart",
    "marker": {
      "color": "rgba(255, 99

Document saved as ChartQA_prompt_test.docx
Processing D:/Research Internship/chartQA dataset/ChartQA Dataset/test/png\10600.png with D:/Research Internship/chartQA dataset/ChartQA Dataset/test/tables\10600.csv
Extracted Json:
  <data>
[
  {
    "type": "pie",
    "labels": ["Approve", "Disapprove", "Don't Know"],
    "values": [58, 39, 3],
    "hoverinfo": "label+percent+value"
  }
]
</data>
<layout>
{
  "title": {
    "text": "Support for Obama's Plan to Delay Troop Pullout",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "color": "#000000"
    }
  },
  "legend": {
    "orientation": "h",
    "x": 0.5,
    "xanchor": "center",
    "y": -0.2,
    "font": {
      "family": "Arial, sans-serif",
      "size": 12,
      "color": "#000000"
    }
  },
  "margin": {
    "l": 60,
    "r": 30,
    "b": 60,
    "t": 60
  },
  "plot_bgcolor": "#ffffff",
  "paper_bgcolor": "#ffffff"
}
</layout>
<config>
{
  "responsive": true,
  "displayModeBar": true,
  "modeBarButtons

Document saved as ChartQA_prompt_test.docx
Processing D:/Research Internship/chartQA dataset/ChartQA Dataset/test/png\10669853002985.png with D:/Research Internship/chartQA dataset/ChartQA Dataset/test/tables\10669853002985.csv
Extracted Json:
  <data>
[
  {
    "type": "bar",
    "x": [
      "Burkina Faso",
      "Cambodia"
    ],
    "y": [
      75,
      70
    ],
    "name": "Countries",
    "marker": {
      "color": [
        "rgba(55, 128, 191, 0.6)",
        "rgba(255, 159, 64, 0.6)"
      ]
    },
    "hoverinfo": "x+y+text"
  }
]
</data>
<layout>
{
  "title": {
    "text": "Share of pregnant women who receive antiretroviral therapy, 2014",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "color": "#000000"
    }
  },
  "xaxis": {
    "title": {
      "text": "Countries",
      "font": {
        "family": "Arial, sans-serif",
        "size": 18,
        "color": "#000000"
      }
    },
    "showgrid": true,
    "gridcolor": "rgba(0, 0, 0, 0.1)",
  

Document saved as ChartQA_prompt_test.docx
Processing D:/Research Internship/chartQA dataset/ChartQA Dataset/test/png\10688218006639.png with D:/Research Internship/chartQA dataset/ChartQA Dataset/test/tables\10688218006639.csv
Extracted Json:
  <data>
[
  {
    "type": "scatter",
    "x": [1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017],
    "y": [48.6, 49.8, 50.1, 51.2, 53.4, 53.3, 52.1, 51.3, 50.6, 49.2, 48.3, 47.2, 46.2, 45.3, 44.2, 43.1, 41.9, 40.8, 40.1, 38.3, 36.6, 34.9, 33.3, 31.9, 30.4, 29.1, 27.9],
    "mode": "lines",
    "name": "Share of the labor force employed in agriculture, 1991 to 2017",
    "line": {
      "color": "rgba(75, 192, 192, 1)",
      "width": 2
    }
  }
]
</data>
<layout>
{
  "title": {
    "text": "Share of the labor force employed in agriculture, 1991 to 2017",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "colo

Document saved as ChartQA_prompt_test.docx
Processing D:/Research Internship/chartQA dataset/ChartQA Dataset/test/png\10715.png with D:/Research Internship/chartQA dataset/ChartQA Dataset/test/tables\10715.csv
Extracted Json:
  <data>
[
  {
    "type": "pie",
    "labels": ["Strong ties with U.S.", "Equally close ties with U.S. and Russia", "Strong ties with Russia", "Don't know"],
    "values": [57, 21, 15, 7]
  }
]
</data>
<layout>
{
  "title": {
    "text": "Germans: Majority Prefer Strong Ties with U.S.",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "color": "#000000"
    }
  },
  "legend": {
    "orientation": "h",
    "x": 0.5,
    "xanchor": "center",
    "y": -0.2,
    "font": {
      "family": "Arial, sans-serif",
      "size": 12,
      "color": "#000000"
    }
  },
  "margin": {
    "l": 60,
    "r": 30,
    "b": 60,
    "t": 60
  },
  "plot_bgcolor": "#ffffff",
  "paper_bgcolor": "#ffffff"
}
</layout>
<config>
{
  "responsive": true,
  "display

Document saved as ChartQA_prompt_test.docx
Processing D:/Research Internship/chartQA dataset/ChartQA Dataset/test/png\11086.png with D:/Research Internship/chartQA dataset/ChartQA Dataset/test/tables\11086.csv
Extracted Json:
  <data>
[
  {
    "type": "pie",
    "labels": ["Support Active Engagement", "Focus on Scientific Work"],
    "values": [87, 13],
    "hoverinfo": "label+percent+value"
  },
  {
    "type": "bar",
    "x": ["Strongly Support", "Support", "Oppose", "Strongly Oppose"],
    "y": [22, 52, 19, 7],
    "name": "Support for Active Engagement in Public Policy Debates",
    "marker": {
      "color": "rgba(255, 99, 132, 0.6)"
    },
    "hoverinfo": "x+y+text"
  }
]
</data>
<layout>
{
  "title": {
    "text": "Most Scientists Support Active Engagement in Public Policy Debates",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "color": "#000000"
    }
  },
  "xaxis": {
    "title": {
      "text": "Support Level",
      "font": {
        "family":

Document saved as ChartQA_prompt_test.docx
Processing D:/Research Internship/chartQA dataset/ChartQA Dataset/test/png\11128.png with D:/Research Internship/chartQA dataset/ChartQA Dataset/test/tables\11128.csv
Extracted Json:
  <data>
[
  {
    "type": "bar",
    "x": [
      "U.S.",
      "Germany",
      "Italy",
      "Greece",
      "Poland",
      "France",
      "Spain",
      "UK"
    ],
    "y": [
      43,
      63,
      63,
      62,
      62,
      54,
      48,
      37
    ],
    "name": "Success is out of control",
    "marker": {
      "color": "rgba(255, 99, 132, 0.6)"
    },
    "hoverinfo": "x+y+text"
  }
]
</data>
<layout>
{
  "title": {
    "text": "European Millennials Say Success Is Out of Their Hands",
    "font": {
      "family": "Arial, sans-serif",
      "size": 20,
      "color": "#000000"
    }
  },
  "xaxis": {
    "title": {
      "text": "Country",
      "font": {
        "family": "Arial, sans-serif",
        "size": 14,
        "color": "#000000"
    

Document saved as ChartQA_prompt_test.docx
Processing D:/Research Internship/chartQA dataset/ChartQA Dataset/test/png\11234.png with D:/Research Internship/chartQA dataset/ChartQA Dataset/test/tables\11234.csv
Extracted Json:
  <data>
[
  {
    "type": "bar",
    "x": [
      "Using landline phone",
      "Calling on your cell phone",
      "Sending text messages",
      "Sending email",
      "Using chat or IM",
      "Using social media sites"
    ],
    "y": [
      [16, 51, 19, 12],
      [9, 43, 29, 17],
      [7, 32, 37, 22],
      [5, 35, 36, 21],
      [4, 25, 36, 32],
      [2, 14, 28, 53]
    ],
    "name": "Communication Method",
    "hoverinfo": "text",
    "text": [
      "Very secure: 16<br>Somewhat secure: 51<br>Not very secure: 19<br>Not at all secure: 12",
      "Very secure: 9<br>Somewhat secure: 43<br>Not very secure: 29<br>Not at all secure: 17",
      "Very secure: 7<br>Somewhat secure: 32<br>Not very secure: 37<br>Not at all secure: 22",
      "Very secure: 5<br>S

Document saved as ChartQA_prompt_test.docx
Processing D:/Research Internship/chartQA dataset/ChartQA Dataset/test/png\11495956000050.png with D:/Research Internship/chartQA dataset/ChartQA Dataset/test/tables\11495956000050.csv
Extracted Json:
  <data>
[
  {
    "type": "scatter",
    "x": [2004, 2006, 2008, 2010, 2012, 2014],
    "y": [4.3, 4.15, 4.05, 3.95, 3.85, 3.75],
    "name": "Eritrea",
    "line": {
      "color": "rgba(255, 99, 132, 1)",
      "width": 2
    }
  },
  {
    "type": "scatter",
    "x": [2004, 2006, 2008, 2010, 2012, 2014],
    "y": [2.75, 2.85, 2.95, 3.05, 3.15, 3.25],
    "name": "North America",
    "line": {
      "color": "rgba(54, 162, 235, 1)",
      "width": 2
    }
  }
]
</data>
<layout>
{
  "title": {
    "text": "The disease burden from alcohol use disorders, 2004 to 2014",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "color": "#000000"
    }
  },
  "xaxis": {
    "title": {
      "text": "Year",
      "font": {
        "

Document saved as ChartQA_prompt_test.docx
Processing D:/Research Internship/chartQA dataset/ChartQA Dataset/test/png\11627839005738.png with D:/Research Internship/chartQA dataset/ChartQA Dataset/test/tables\11627839005738.csv
Extracted Json:
  <data>
[
  {
    "type": "scatter",
    "x": [2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014],
    "y": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    "mode": "lines",
    "name": "North Korea",
    "line": {
      "color": "rgba(255, 99, 132, 1)",
      "width": 2
    }
  }
]
</data>
<layout>
{
  "title": {
    "text": "Death rates from conflict and terrorism, 2000 to 2014",
    "font": {
      "family": "Arial, sans-serif",
      "size": 24,
      "color": "#000000"
    }
  },
  "xaxis": {
    "title": {
      "text": "Year",
      "font": {
        "family": "Arial, sans-serif",
        "size": 18,
        "color": "#000000"
      }
    },
    "showgrid": true,
    "gridcolor": "rgba(0, 0, 0, 0.1

Document saved as ChartQA_prompt_test.docx
Processing D:/Research Internship/chartQA dataset/ChartQA Dataset/test/png\11680.png with D:/Research Internship/chartQA dataset/ChartQA Dataset/test/tables\11680.csv
Extracted Json:
  <data>
[
  {
    "type": "scatter",
    "x": [1969, 1974, 1979, 1984, 1989, 1994, 1999, 2004, 2009, 2014],
    "y": [84, 66, 30, 22, 81, 75, 63, 60, 41, 45],
    "mode": "lines+markers",
    "name": "Legal",
    "marker": {
      "color": "rgba(255, 159, 64, 0.8)",
      "size": 8
    },
    "line": {
      "color": "rgba(255, 159, 64, 1)",
      "width": 2
    },
    "hoverinfo": "x+y+text"
  },
  {
    "type": "scatter",
    "x": [1969, 1974, 1979, 1984, 1989, 1994, 1999, 2004, 2009, 2014],
    "y": [12, 33, 60, 70, 16, 21, 31, 32, 52, 55],
    "mode": "lines+markers",
    "name": "Illegal",
    "marker": {
      "color": "rgba(75, 192, 192, 0.6)",
      "size": 8
    },
    "line": {
      "color": "rgba(75, 192, 192, 1)",
      "width": 2
    },
    "hoverin

Document saved as ChartQA_prompt_test.docx
Processing D:/Research Internship/chartQA dataset/ChartQA Dataset/test/png\11832048004655.png with D:/Research Internship/chartQA dataset/ChartQA Dataset/test/tables\11832048004655.csv
Extracted Json:
  <data>
[
  {
    "type": "bar",
    "x": [
      "Lithuania",
      "Bolivia"
    ],
    "y": [
      19,
      9.3
    ],
    "name": "Commercial Bank Branches"
  }
]
</data>
<layout>
{
  "title": {
    "text": "Number of Commercial Bank Branches per 100,000 Adults in 2011",
    "font": {
      "family": "Arial, sans-serif",
      "size": 18,
      "color": "#333333"
    }
  },
  "xaxis": {
    "title": {
      "text": "Country",
      "font": {
        "family": "Arial, sans-serif",
        "size": 14,
        "color": "#333333"
      }
    },
    "tickfont": {
      "family": "Arial, sans-serif",
      "size": 12,
      "color": "#333333"
    }
  },
  "yaxis": {
    "title": {
      "text": "Number of Branches per 100,000 Adults",
      "fon

Document saved as ChartQA_prompt_test.docx
Processing D:/Research Internship/chartQA dataset/ChartQA Dataset/test/png\1201.png with D:/Research Internship/chartQA dataset/ChartQA Dataset/test/tables\1201.csv
Extracted Json:
  <data>
[
  {
    "type": "bar",
    "x": [
      "18-29",
      "30-49",
      "50-64",
      "65+",
      "All adults"
    ],
    "y": [
      26,
      34,
      43,
      49,
      38
    ],
    "name": "Personal Health",
    "marker": {
      "color": "rgba(255, 99, 132, 0.6)"
    },
    "hoverinfo": "x+y+text"
  },
  {
    "type": "bar",
    "x": [
      "18-29",
      "30-49",
      "50-64",
      "65+",
      "All adults"
    ],
    "y": [
      40,
      43,
      46,
      32,
      41
    ],
    "name": "Personal Finances",
    "marker": {
      "color": "rgba(54, 162, 235, 0.6)"
    },
    "hoverinfo": "x+y+text"
  }
]
</data>
<layout>
{
  "title": {
    "text": "COVID-19 Impact on Personal Health and Finances",
    "font": {
      "family": "Arial, san

Document saved as ChartQA_prompt_test.docx
Processing D:/Research Internship/chartQA dataset/ChartQA Dataset/test/png\12051.png with D:/Research Internship/chartQA dataset/ChartQA Dataset/test/tables\12051.csv
Extracted Json:
  <data>
[
  {
    "type": "scatter",
    "x": [2009, 2010, 2011, 2012, 2013, 2014],
    "y": [48, 53, 57, 59, 38, 51],
    "name": "India",
    "marker": {
      "color": "rgba(255, 159, 64, 0.8)"
    },
    "line": {
      "color": "rgba(255, 159, 64, 1)",
      "width": 2
    },
    "hoverinfo": "x+y+text"
  },
  {
    "type": "scatter",
    "x": [2009, 2010, 2011, 2012, 2013, 2014],
    "y": [32, 23, 19, 23, 33, 25],
    "name": "Taliban",
    "marker": {
      "color": "rgba(75, 192, 192, 0.6)"
    },
    "line": {
      "color": "rgba(75, 192, 192, 1)",
      "width": 2
    },
    "hoverinfo": "x+y+text"
  },
  {
    "type": "scatter",
    "x": [2009, 2010, 2011, 2012, 2013, 2014],
    "y": [4, 3, 5, 4, 4, 2],
    "name": "Al-Qaeda",
    "marker": {
      "c

Document saved as ChartQA_prompt_test.docx
Processing D:/Research Internship/chartQA dataset/ChartQA Dataset/test/png\12097783003404.png with D:/Research Internship/chartQA dataset/ChartQA Dataset/test/tables\12097783003404.csv


KeyboardInterrupt: 

In [20]:
import os
import csv
import json

def generate_latex_doc_v2(doc_path, image_path, csv_path, prompt, data, layout, config, updated_data, updated_layout, updated_config, chart_path, performance_metrics, extracted_csv):
    def format_json_to_latex(json_obj):
        # Format JSON as LaTeX code
        return json.dumps(json_obj, indent=4).replace('{', '\\{').replace('}', '\\}').replace('_', '\\_').replace('#', '\\#')

    def csv_to_latex_table(csv_path):
        # Convert CSV file to LaTeX tabular format
        latex_table = "\\begin{tabular}{%s}\n" % ("|".join(["c"] * max(len(row) for row in csv_data)))
        for row in csv_data:
            latex_table += " & ".join(row).replace('_', '\\_').replace('%', '\\%') + " \\\\\n"
        latex_table += "\\end{tabular}\n"
        return latex_table

    # Start the LaTeX document
    latex_content = r"""\documentclass{article}
\usepackage{graphicx}
\usepackage{hyperref}
\usepackage{longtable}
\usepackage{adjustbox}
\usepackage{geometry}
\geometry{a4paper, margin=1in}
\title{Research Project Documentation}
\begin{document}
\maketitle
"""

    # Add image
    latex_content += r"""\section*{Test Run}
\begin{figure}[h!]
\centering
\includegraphics[width=0.7\textwidth]{%s}
\caption{Figure: %s}
\end{figure}
""" % (image_path, os.path.basename(image_path))

    # Add Original CSV
    if csv_path and os.path.exists(csv_path):
        with open(csv_path, 'r', newline='', encoding='utf-8') as csvfile:
            csv_reader = csv.reader(csvfile)
            csv_data = list(csv_reader)

        latex_content += r"""\section*{Original Data: %s}
\begin{center}
\begin{adjustbox}{max width=\textwidth}
%s
\end{adjustbox}
\end{center}
""" % (os.path.basename(csv_path), csv_to_latex_table(csv_path))

    # Add Extracted CSV
    if extracted_csv and os.path.exists(extracted_csv):
        with open(extracted_csv, 'r', newline='', encoding='utf-8') as csvfile:
            csv_reader = csv.reader(csvfile)
            csv_data = list(csv_reader)

        latex_content += r"""\section*{Extracted Data: %s}
\begin{center}
\begin{adjustbox}{max width=\textwidth}
%s
\end{adjustbox}
\end{center}
""" % (os.path.basename(extracted_csv), csv_to_latex_table(extracted_csv))

    # Add Prompt
    latex_content += r"""\section*{Prompt Used}
\begin{verbatim}
%s
\end{verbatim}
""" % prompt.replace('_', '\\_').replace('%', '\\%')

    # Add Extracted JSON
    latex_content += r"""\section*{Extracted JSON}
\subsection*{data}
\begin{verbatim}
%s
\end{verbatim}
\subsection*{layout}
\begin{verbatim}
%s
\end{verbatim}
\subsection*{config}
\begin{verbatim}
%s
\end{verbatim}
""" % (format_json_to_latex(data), format_json_to_latex(layout), format_json_to_latex(config))

    # Add Updated JSON
    latex_content += r"""\section*{Updated JSON}
\subsection*{updated\_data}
\begin{verbatim}
%s
\end{verbatim}
\subsection*{updated\_layout}
\begin{verbatim}
%s
\end{verbatim}
\subsection*{updated\_config}
\begin{verbatim}
%s
\end{verbatim}
""" % (format_json_to_latex(updated_data), format_json_to_latex(updated_layout), format_json_to_latex(updated_config))

    # Add Updated Chart
    latex_content += r"""\section*{Updated Chart}
\begin{figure}[h!]
\centering
\includegraphics[width=0.7\textwidth]{%s}
\caption{Updated Chart: %s}
\end{figure}
""" % (chart_path, os.path.basename(chart_path))

    # Add Performance Metrics
    latex_content += r"""\section*{Performance Metrics}
\begin{longtable}{|c|c|c|c|}
\hline
Task & Model Used & Description & Time Taken (s) \\
\hline
\endfirsthead
\hline
Task & Model Used & Description & Time Taken (s) \\
\hline
\endhead
\hline
\endfoot
"""

    for metric in performance_metrics:
        latex_content += " & ".join(metric).replace('_', '\\_').replace('%', '\\%') + " \\\\\n"
        latex_content += "\\hline\n"

    latex_content += r"""\end{longtable}
\end{document}
"""

    # Save LaTeX document
    with open(doc_path, 'w', encoding='utf-8') as f:
        f.write(latex_content)

    print(f"LaTeX document saved as {doc_path}")

# Example usage
# generate_latex_doc_v2('path/to/output.tex', 'path/to/image.png', 'path/to/original.csv', 'Prompt text here', 
#                       data_json, layout_json, config_json, updated_data_json, updated_layout_json, updated_config_json, 
#                       'path/to/updated_chart.png', performance_metrics_list, 'path/to/extracted.csv')
