# Care Copilot MVP

## Key Features
- User prompt -> chart creation
- Automatic (LLM generated) or manual chart type and names (currently only bar charts and pie charts)

## Future Improvements
- Better object creation workflow
- Error reporting features
- create a more fluid flow - i.e feedback between different layers of the program

In [1]:
import sys
sys.path
if 'f:\\Users\\hassa\\carecognitics_backend' not in sys.path:
    sys.path.append('f:\\Users\\hassa\\carecognitics_backend')

In [2]:
from supersetapiclient.client import SupersetAPIClient
from security.nl_dml_security_layer import NL_SECURITY_LAYER
from llm.sql_generator import invoke_full_chain
from visualization_recommendation_system.visual_recommender import visual_recommender 



## manual dashbaord buliding using charts workflow

In [3]:
sc=SupersetAPIClient(base_url='http://localhost:8088',
                     username='admin',
                     password='admin')

### create new dashboard (optional)

In [5]:
# initiate new dashboard to add future charts to
sc.create_dashboard(name='Global Track sales Deep Dive v2')

### NL input

In [6]:
input="make me a pie chart for the top 5 most popular rock artists in the USA"
cleaner =NL_SECURITY_LAYER(input)
protection_output = cleaner.protect_input()

if not protection_output:
    final_output=invoke_full_chain(input)
else:
    print(protection_output)

In [7]:
sql_query, dataset_name, chart_type, viz_type, slice_name = final_output

In [8]:
print(f" query: {sql_query}\n------------------------------")
print(f" dataset_name: {dataset_name}\n------------------------------")
print(f"chart type: {chart_type}\n------------------------------")
print(f"viz_type: {viz_type}\n------------------------------")
print(f"slice_name: {slice_name}\n------------------------------")

 query: SELECT a.name AS artist_name, SUM(i.total) AS total_sales FROM artist a JOIN album al ON a.artist_id = al.artist_id JOIN track t ON al.album_id = t.album_id JOIN genre g ON t.genre_id = g.genre_id JOIN invoice_line il ON t.track_id = il.track_id JOIN invoice i ON il.invoice_id = i.invoice_id WHERE g.name = 'Rock' GROUP BY a.name ORDER BY total_sales DESC LIMIT 5;
------------------------------
 dataset_name: Rock_Artists_Sales
------------------------------
chart type: Pie Chart
------------------------------
viz_type: pie
------------------------------
slice_name: Rock Royalty: Top 5 Artists in the USA
------------------------------


### create new dataset from LLM generated SQL

In [9]:
sc.create_dataset(sql=sql_query, table_name=dataset_name)


dataset has been created with name Rock_Artists_Sales


### choose a chart type manually (optional)

In [9]:
vr = visual_recommender()

In [10]:
feature_dict=vr.parse_sql(sql=sql_query)
feature_dict

{'agg': 'SUM', 'agg_by': 'total', 'groupby': 'billing_country'}

In [9]:
# viz_type=vr.generate_visual_recommendations(sql=sql_query, return_type='best')

### create the chart

In [10]:
sc.create_chart(slice_name=slice_name,
                       viz_type=viz_type,
                       dashboard_ids=None)

In [11]:
sc.dashboards.user_dashboard_ids

[31]

### repeat for different inputs 

#### in this example we get error because UK is not in the dataset so empty data in returned with no columns hence index error

In [12]:
input="make me a pie chart for the top 5 most popular genres in the UK"
cleaner =NL_SECURITY_LAYER(input)
protection_output = cleaner.protect_input()

if not protection_output:
    final_output=invoke_full_chain(input)
else:
    print(protection_output)
sql_query, dataset_name, chart_type, viz_type, slice_name = final_output
sc.create_dataset(sql=sql_query, table_name=dataset_name)
sc.create_chart(slice_name=slice_name,
                       viz_type=viz_type,
                       dashboard_ids=None)

dataset has been created with name UK_Music_Genre_Sales


IndexError: list index out of range

In [13]:
sql_query

"SELECT genre.name, SUM(invoice_line.quantity) AS total_sales,     (SUM(invoice_line.quantity) / (SELECT SUM(quantity) FROM invoice_line WHERE invoice_id IN (SELECT invoice_id FROM invoice WHERE billing_country = 'UK'))) * 100 AS percentage FROM invoice_line JOIN track ON invoice_line.track_id = track.track_id JOIN genre ON track.genre_id = genre.genre_id JOIN invoice ON invoice_line.invoice_id = invoice.invoice_id WHERE invoice.billing_country = 'UK' GROUP BY genre.genre_id ORDER BY total_sales DESC LIMIT 5;"

#### now we see we put correct name for UK we get the right output

In [14]:
input="make me a pie chart for the top 5 most popular genres in the United Kingdom"
cleaner =NL_SECURITY_LAYER(input)
protection_output = cleaner.protect_input()

if not protection_output:
    final_output=invoke_full_chain(input)
else:
    print(protection_output)
sql_query, dataset_name, chart_type, viz_type, slice_name = final_output
sc.create_dataset(sql=sql_query, table_name=dataset_name)
sc.create_chart(slice_name=slice_name,
                       viz_type=viz_type,
                       dashboard_ids=None)

dataset has been created with name UK_Music_Genre_Popularity


In [15]:
input="make me a bar chart for the most popular artists globally"
cleaner =NL_SECURITY_LAYER(input)
protection_output = cleaner.protect_input()

if not protection_output:
    final_output=invoke_full_chain(input)
else:
    print(protection_output)
sql_query, dataset_name, chart_type, viz_type, slice_name = final_output
sc.create_dataset(sql=sql_query, table_name=dataset_name)
sc.create_chart(slice_name=slice_name,
                       viz_type=viz_type,
                       dashboard_ids=None)

dataset has been created with name global_music_sales_by_artist


In [16]:
input="make me a bar chart for sales revenue of the artist 'Lost' globally"
cleaner =NL_SECURITY_LAYER(input)
protection_output = cleaner.protect_input()

if not protection_output:
    final_output=invoke_full_chain(input)
else:
    print(protection_output)
sql_query, dataset_name, chart_type, viz_type, slice_name = final_output
sc.create_dataset(sql=sql_query, table_name=dataset_name)
sc.create_chart(slice_name=slice_name,
                       viz_type=viz_type,
                       dashboard_ids=None)

dataset has been created with name lost_sales_revenue


KeyError: 'id'

In [4]:
input="make me a bar chart for global sales revenue of the artist 'Lost'"
cleaner =NL_SECURITY_LAYER(input)
protection_output = cleaner.protect_input()

if not protection_output:
    final_output=invoke_full_chain(input)
else:
    print(protection_output)
sql_query, dataset_name, chart_type, viz_type, slice_name = final_output
sc.create_dataset(sql=sql_query, table_name=dataset_name, verbose=True)
sc.create_chart(slice_name=slice_name,
                       viz_type=viz_type,
                       dashboard_ids=[31])

dataset has been created with name Global_Sales_Revenue_Lost


In [5]:
sql_query

'SELECT      EXTRACT(YEAR FROM invoice_date) AS "Year",     SUM(total) AS "Global Sales Revenue for Lost" FROM invoice JOIN customer ON invoice.customer_id = customer.customer_id JOIN invoice_line ON invoice.invoice_id = invoice_line.invoice_id JOIN track ON invoice_line.track_id = track.track_id JOIN album ON track.album_id = album.album_id JOIN artist ON album.artist_id = artist.artist_id WHERE artist.name = \'Lost\' GROUP BY EXTRACT(YEAR FROM invoice_date) ORDER BY "Year";'

In [6]:
input="make a pie chart for the genres with the longest duration "
cleaner =NL_SECURITY_LAYER(input)
protection_output = cleaner.protect_input()

if not protection_output:
    final_output=invoke_full_chain(input)
else:
    print(protection_output)
sql_query, dataset_name, chart_type, viz_type, slice_name = final_output
sc.create_dataset(sql=sql_query, table_name=dataset_name, verbose=True)
sc.create_chart(slice_name=slice_name,
                       viz_type=viz_type,
                       dashboard_ids=[31])

dataset has been created with name Music_Genre_Duration


In [4]:
input="create a timeseries chart for tracks sales in the USA"
cleaner =NL_SECURITY_LAYER(input)
protection_output = cleaner.protect_input()

if not protection_output:
    final_output=invoke_full_chain(input)
else:
    print(protection_output)
sql_query, dataset_name, chart_type, viz_type, slice_name = final_output
sc.create_dataset(sql=sql_query, table_name=dataset_name, verbose=False)
sc.create_chart(slice_name=slice_name,
                       viz_type=viz_type,
                       dashboard_ids=[31],
                       verbose=True)

dataset has been created with name track_sales_usa


In [6]:
sql_query, dataset_name, chart_type, viz_type, slice_name

("SELECT     date_trunc('month', i.invoice_date) AS month,     SUM(il.quantity) AS total_tracks_sold FROM invoice i JOIN invoice_line il ON i.invoice_id = il.invoice_id WHERE i.billing_country = 'USA' GROUP BY month ORDER BY month;",
 'track_sales_usa',
 'Timeseries',
 'echarts_timeseries_line',
 '"US Track Sales Over Time"')

In [5]:
input="create a table for the names and sales revenue of all genres"
cleaner =NL_SECURITY_LAYER(input)
protection_output = cleaner.protect_input()

if not protection_output:
    print('generating SQL'+'\n'+'-'*20)
    final_output=invoke_full_chain(input)
else:
    print(protection_output)
sql_query, dataset_name, chart_type, viz_type, slice_name = final_output
print('SQL generated'+'\n'+'-'*20)
print('creating dataset'+'\n'+'-'*20)
sc.create_dataset(sql=sql_query, table_name=dataset_name, verbose=False)
print('creating chart'+'\n'+'-'*20)
sc.create_chart(slice_name=slice_name,
                       viz_type=viz_type,
                       dashboard_ids=[31],
                       verbose=True)

generating SQL
*20
SQL generated
*20
creating dataset
*20
dataset has been created with name music_sales_data
creating chart
*20


In [4]:
input="create a scatter plot of the revenue of each artist and the genre they are associated with, only return two columns "
cleaner =NL_SECURITY_LAYER(input)
protection_output = cleaner.protect_input()

if not protection_output:
    print('generating SQL'+'\n'+'-'*20)
    final_output=invoke_full_chain(input)
else:
    print(protection_output)
sql_query, dataset_name, chart_type, viz_type, slice_name = final_output
print('SQL generated'+'\n'+'-'*20)
print('creating dataset'+'\n'+'-'*20)
sc.create_dataset(sql=sql_query, table_name=dataset_name, verbose=False)
print('creating chart'+'\n'+'-'*20)
sc.create_chart(slice_name=slice_name,
                       viz_type=viz_type,
                       dashboard_ids=[31],
                       verbose=True)

generating SQL
--------------------
SQL generated
--------------------
creating dataset
--------------------
dataset has been created with name Music_Revenue_By_Genre
creating chart
--------------------


In [5]:
sql_query, dataset_name, chart_type, viz_type, slice_name

('SELECT     g.name AS Genre,     SUM(il.unit_price * il.quantity) AS Revenue FROM genre g JOIN track t ON g.genre_id = t.genre_id JOIN invoice_line il ON t.track_id = il.track_id JOIN invoice i ON il.invoice_id = i.invoice_id GROUP BY g.name;',
 'Music_Revenue_By_Genre',
 'Scatter Plot',
 'echarts_timeseries_scatter',
 '"Revenue by Genre"')