In [71]:
import os
from pathlib import Path
from typing import Dict, List, Optional, Any, Union
import json
import traceback


import cudf
import pandas as pd
import cuxfilter as cxf

from cuxfilter import DataFrame, charts
# from cuxfilter.charts.datashader import line

In [8]:
def _check_gpu() -> bool:
    try:
        test = cudf.DataFrame({'a': [1]})
        return True
    except:
        return False

use_gpu = _check_gpu()
use_gpu

True

In [17]:
def load_data(filepath: str) -> Dict[str, Any]:
    """Load data and create cuxfilter.DataFrame."""
    try:
        filepath = Path(filepath)
        if not filepath.exists():
            return {"success": False, "error": f"File not found"}
        
        if filepath.suffix == '.csv':
            df = cudf.read_csv(str(filepath))
        elif filepath.suffix == '.parquet':
            df = cudf.read_parquet(str(filepath))
        elif filepath.suffix == '.json':
            df = cudf.read_json(str(filepath))
        else:
            return {"success": False, "error": f"Unsupported type"}
        
        cxf_df = cxf.DataFrame.from_dataframe(df)
        
        return df, cxf_df, {
            "success": True,
            "filepath": str(filepath),
            "shape": df.shape,
            "columns": list(df.columns),
            "using_gpu": use_gpu
        }
    except Exception as e:
        return {"success": False, "error": str(e)}

df, cxf_df, dic = load_data("./data/nyc_taxi_wide.parquet")

In [19]:
df.head()

Unnamed: 0,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,pickup_x,pickup_y,dropoff_x,dropoff_y,fare_amount,tip_amount,dropoff_hour,pickup_hour
0,2015-01-15 19:05:39,2015-01-15 19:23:42,1,1.59,-8236963.0,4975552.5,-8234835.5,4975627.0,12.0,3.25,19,19
1,2015-01-10 20:33:38,2015-01-10 20:53:28,1,3.3,-8237826.0,4971752.5,-8237020.5,4976875.0,14.5,2.0,20,20
2,2015-01-10 20:33:38,2015-01-10 20:43:41,1,1.8,-8233561.5,4983296.5,-8232279.0,4986477.0,9.5,0.0,20,20
3,2015-01-10 20:33:39,2015-01-10 20:35:31,1,0.5,-8238654.0,4970221.0,-8238124.0,4971127.0,3.5,0.0,20,20
4,2015-01-10 20:33:39,2015-01-10 20:52:58,1,3.0,-8234433.5,4977363.0,-8238107.5,4974457.0,15.0,0.0,20,20


In [21]:
cxf_df.data.head()

Unnamed: 0,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,pickup_x,pickup_y,dropoff_x,dropoff_y,fare_amount,tip_amount,dropoff_hour,pickup_hour
0,2015-01-15 19:05:39,2015-01-15 19:23:42,1,1.59,-8236963.0,4975552.5,-8234835.5,4975627.0,12.0,3.25,19,19
1,2015-01-10 20:33:38,2015-01-10 20:53:28,1,3.3,-8237826.0,4971752.5,-8237020.5,4976875.0,14.5,2.0,20,20
2,2015-01-10 20:33:38,2015-01-10 20:43:41,1,1.8,-8233561.5,4983296.5,-8232279.0,4986477.0,9.5,0.0,20,20
3,2015-01-10 20:33:39,2015-01-10 20:35:31,1,0.5,-8238654.0,4970221.0,-8238124.0,4971127.0,3.5,0.0,20,20
4,2015-01-10 20:33:39,2015-01-10 20:52:58,1,3.0,-8234433.5,4977363.0,-8238107.5,4974457.0,15.0,0.0,20,20


In [22]:
dic

{'success': True,
 'filepath': 'data/nyc_taxi_wide.parquet',
 'shape': (11842094, 12),
 'columns': ['tpep_pickup_datetime',
  'tpep_dropoff_datetime',
  'passenger_count',
  'trip_distance',
  'pickup_x',
  'pickup_y',
  'dropoff_x',
  'dropoff_y',
  'fare_amount',
  'tip_amount',
  'dropoff_hour',
  'pickup_hour'],
 'using_gpu': True}

In [24]:
def get_data_info(df) -> Dict[str, Any]:
    """Get info about loaded data."""
    if df is None:
        return {"success": False, "error": "No data"}
    return {
        "success": True,
        "shape": df.shape,
        "columns": list(df.columns)
    }

info = get_data_info(df)
info

{'success': True,
 'shape': (11842094, 12),
 'columns': ['tpep_pickup_datetime',
  'tpep_dropoff_datetime',
  'passenger_count',
  'trip_distance',
  'pickup_x',
  'pickup_y',
  'dropoff_x',
  'dropoff_y',
  'fare_amount',
  'tip_amount',
  'dropoff_hour',
  'pickup_hour']}

In [46]:
cxf_df.data.head()

Unnamed: 0,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,pickup_x,pickup_y,dropoff_x,dropoff_y,fare_amount,tip_amount,dropoff_hour,pickup_hour
0,2015-01-15 19:05:39,2015-01-15 19:23:42,1,1.59,-8236963.0,4975552.5,-8234835.5,4975627.0,12.0,3.25,19,19
1,2015-01-10 20:33:38,2015-01-10 20:53:28,1,3.3,-8237826.0,4971752.5,-8237020.5,4976875.0,14.5,2.0,20,20
2,2015-01-10 20:33:38,2015-01-10 20:43:41,1,1.8,-8233561.5,4983296.5,-8232279.0,4986477.0,9.5,0.0,20,20
3,2015-01-10 20:33:39,2015-01-10 20:35:31,1,0.5,-8238654.0,4970221.0,-8238124.0,4971127.0,3.5,0.0,20,20
4,2015-01-10 20:33:39,2015-01-10 20:52:58,1,3.0,-8234433.5,4977363.0,-8238107.5,4974457.0,15.0,0.0,20,20


In [47]:
type(cxf_df)

cuxfilter.dataframe.DataFrame

In [72]:
def create_bar_chart(x: str, y: Optional[str] = None, aggregate_fn: str = "count", title: str = "") -> Any:
    """cuxfilter.charts.bar()"""
    return cxf.charts.bar(x=x, y=y, aggregate_fn=aggregate_fn, title=title) if y else cxf.charts.bar(x=x, title=title)

def create_line_chart(x: str, y: str, color: Optional[str] = None, title: str = "") -> Any:
    """cuxfilter.charts.line()"""
    return cxf.charts.datashader.line(x=x, y=y, color=color, title=title) if color else cxf.charts.datashader.line(x=x, y=y, title=title)

def create_scatter_chart(x: str, y: str, color: Optional[str] = None, size: Optional[str] = None, title: str = "") -> Any:
    """cuxfilter.charts.scatter()"""
    kwargs = {"x": x, "y": y, "title": title}
    if color: kwargs["color"] = color
    if size: kwargs["size"] = size
    return cxf.charts.scatter(**kwargs)

def create_stacked_lines_chart(x: str, y: Union[str, List[str]], title: str = "") -> Any:
    """cuxfilter.charts.stacked_lines()"""
    return cxf.charts.stacked_lines(x=x, y=y, title=title)

def create_heatmap(x: str, y: str, aggregate_fn: str = "count", title: str = "") -> Any:
    """cuxfilter.charts.heatmap()"""
    return cxf.charts.heatmap(x=x, y=y, aggregate_fn=aggregate_fn, title=title)

def create_range_slider(x: str, title: str = "") -> Any:
    """cuxfilter.charts.range_slider()"""
    return cxf.charts.range_slider(x=x, title=title)

def create_date_range_slider(x: str, title: str = "") -> Any:
    """cuxfilter.charts.date_range_slider()"""
    return cxf.charts.date_range_slider(x=x, title=title)

def create_float_slider(x: str, title: str = "") -> Any:
    """cuxfilter.charts.float_slider()"""
    return cxf.charts.float_slider(x=x, title=title, step_size=0.5)

def create_int_slider(x: str, title: str = "") -> Any:
    """cuxfilter.charts.int_slider()"""
    return cxf.charts.int_slider(x=x, title=title)

def create_drop_down(x: str, title: str = "") -> Any:
    """cuxfilter.charts.dropdown()"""
    return cxf.charts.drop_down(x=x, title=title)

def create_multi_select(x: str, title: str = "") -> Any:
    """cuxfilter.charts.multi_select()"""
    return cxf.charts.multi_select(x=x, title=title)

def create_number_display(x: str, aggregate_fn: str = "mean", title: str = "") -> Any:
    """cuxfilter.charts.number()"""
    return cxf.charts.number(expression=x, aggregate_fn=aggregate_fn, title=title)

def create_view_dataframe(x: list, title: str = "") -> Any:
    """cuxfilter.charts.view_dataframe([column names])"""
    return cxf.charts.view_dataframe(x)








def get_layout(layout_type: str, num_charts: int = None) -> Any:
    """Get cuxfilter.layouts object."""
    if layout_type == "auto" and num_charts:
        if num_charts == 1: return cxf.layouts.single_feature
        elif num_charts == 2: return cxf.layouts.double_feature
        elif num_charts == 3: return cxf.layouts.triple_feature
        else: return cxf.layouts.quad_feature
    
    layouts = {
        "single_feature": cxf.layouts.single_feature,
        "double_feature": cxf.layouts.double_feature,
        "triple_feature": cxf.layouts.triple_feature,
        "quad_feature": cxf.layouts.quad_feature
    }
    return layouts.get(layout_type, cxf.layouts.double_feature)

def get_theme(theme_name: str) -> Any:
    """Get cuxfilter.themes object."""
    themes = {
        "rapids_dark": cxf.themes.rapids_dark,
        "rapids": cxf.themes.rapids,
        "dark": cxf.themes.dark,
        "light": cxf.themes.default
    }
    return themes.get(theme_name, cxf.themes.rapids_dark)

def create_dashboard(charts: List[Any], layout_type: str = "auto", theme_name: str = "rapids_dark", title: str = "Dashboard") -> Dict[str, Any]:
    """Create cuxfilter.DashBoard."""
    try:
        if not cxf_df or not charts:
            return {"success": False, "error": "No data or charts"}
        
        layout = get_layout(layout_type, len(charts))
        theme = get_theme(theme_name)
        
        active_dashboard = cxf_df.dashboard(
            charts=charts, layout=layout, theme=theme, title=title
        )
        
        # dashboard_counter += 1
        return active_dashboard
        # {
        #     "success": True,
        #     "dashboard_id": f"dashboard_{dashboard_counter}",
        #     "num_charts": len(charts)
        # }
    except Exception as e:
        return {"success": False, "error": str(e)}

In [73]:
bar_chart = create_bar_chart(x="passenger_count", aggregate_fn = "count", title = "bar chart of passenger count")
line_chart = create_line_chart(x="trip_distance", y = "fare_amount", title = "line chart of trip distance")
scatter_chart = create_scatter_chart(x="trip_distance", y = "fare_amount", title = "scatter chart of trip distance")
stacked_lines_chart = create_stacked_lines_chart(x="tpep_pickup_datetime", y = "passenger_count", title = "stacked lines chart of trip distance")


ValueError: y must be a list of atleast one column name

In [57]:
d = create_dashboard(charts = [bar_chart, line_chart, scatter_chart])

In [58]:
d.app()