In [None]:
# -- 1: IMPORTS --
import pandas as pd            # For data loading and cleaning
import plotly.express as px     # For creating the charts
import plotly.graph_objects as go # For advanced charts like the Pyramid
from dash import Dash, dcc, html # (Optional) If you build the final dashboard later

# -- 2. LOADING DATA -- 
# If you are using your new file 'datavis_test.csv'
df = pd.read_csv("datavis_test.csv")

# Quick look at the data to make sure it loaded correctly
df.head()

# 1: SunBrust Chart

In [None]:
from dash import Dash, dcc, html, Input, Output
import plotly.express as px

# 1. Initialize the app for Jupyter
app = Dash(__name__)

# 2. The Layout (same as yours)
app.layout = html.Div([
    html.H2("Participant Data Dashboard"),
    html.Label("View Hierarchy By: "),
    
    dcc.Dropdown( 
        id='hierarchy-dropdown',
        options=[
            {'label': 'Country > Gender > Education', 'value': 'CGE'},
            {'label': 'Education > Country > Gender', 'value': 'ECG'},
            {'label': 'Gender > Education > Country', 'value': 'GEC'}
        ],
        value='CGE',
        clearable=False 
    ),

    dcc.Graph(id='sunburst-graph') # Fixed typo: 'sunburst'
])

# 3. The Callback (same as yours)
@app.callback(
    Output('sunburst-graph', 'figure'),
    Input('hierarchy-dropdown', 'value')
)
def update_hierarchy(selected_path):
    path_map = {
        'CGE': ['intv_BirthCount', 'intv_Gender', 'intv_Edu'],
        'ECG': ['intv_Edu', 'intv_BirthCount', 'intv_Gender'],
        'GEC': ['intv_Gender', 'intv_Edu', 'intv_BirthCount']
    }
    
    fig = px.sunburst(
        df, 
        path=path_map[selected_path], 
        values='intv_Age', 
        color='intv_BirthCount',
        title=f"Hierarchy: {' -> '.join(path_map[selected_path])}"
    )
    
    fig.update_layout(margin=dict(t=40, l=0, r=0, b=0))
    return fig

# 4. The JUPYTER-SPECIFIC RUN COMMAND
# Use jupyter_mode='inline' to see the app inside the notebook cell
app.run(debug=True, jupyter_mode='inline', port=8051)

# 2: Population Pyramid

In [None]:
# -- 1. Prepare Hover Data --
# We pack the extra info into a list of columns
hover_info = df[['intv_Gender', 'intv_Edu', 'intv_BirthCount','intv_Age','intv_YearsSpa','intv_YearsEngl']]


# -- 2. Create the Graph using 'go' (the nickname from your Cell 1) --fig = gp.Figure()
fig = go.Figure()

# Adding Spanish data (Left Side)
fig.add_trace(go.Bar(
	y=df['intv_Id'], 
    x=df['intv_YearsSpa'] * -1,
    name='Spanish',
    orientation='h',
    customdata=hover_info, # Passing the extra data here
    hovertemplate="<b>ID: %{y}</b><br>" +
                  "Age: %{customdata[3]}<br>"+
                  "Spanish: %{customdata[4]} years<br>" + # 'abs' makes negative look positive
                  "Gender: %{customdata[0]}<br>" +
                  "Education: %{customdata[1]}<br>" +
                  "<extra></extra>" # Removes the secondary box
))

# Adding English data (Right Side)
fig.add_trace(go.Bar(
	y=df['intv_Id'], 
    x=df['intv_YearsEngl'],
    name='English',
    orientation='h',
    customdata=hover_info,
    hovertemplate="<b>ID: %{y}</b><br>" +
                "Age: %{customdata[3]}<br>"+
                  "English: %{customdata[5]} years<br>" +
                  "Gender: %{customdata[0]}<br>" +
                  "Education: %{customdata[1]}<br>" +
				                      "<extra></extra>"
))

# -- 3. Clean up the Layout --
fig.update_layout(
	title='Bilingual Profile by Participant',
    barmode='relative',
    xaxis=dict(
        tickvals=[-60, -40, -20, 0, 20, 40, 60],
        ticktext=['60', '40', '20', '0', '20', '40', '60'],
        title='Years of Experience'
    ),
    yaxis=dict(title='Participant ID')
                 )

fig.show()

# 3: Language Dominance Graph

In [None]:
# 1. Create the scatter plot
# We put ALL columns we want in 'hover_data' so they stay synced to the rows
fig_scatter = px.scatter(df, 
                         x="intv_YearsSpa", 
                         y="intv_YearsEngl", 
                         color="intv_BirthCount", 
                         size="intv_Age",          
                         hover_name="intv_Id",
                         title="Language Dominance: Spanish vs. English Years",
                         hover_data={
                             'intv_Id': True,
                             'intv_Gender': True,
                             'intv_Edu': True,
                             'intv_Age': True,
                             'intv_BirthCount': True,
                             'intv_YearsSpa': False, # Hide these to avoid duplicates
                             'intv_YearsEngl': False
                         },
                         labels={
                             "intv_YearsSpa": "Years of Spanish",
                             "intv_YearsEngl": "Years of English",
                             "intv_BirthCount": "Birth Country",
                             "intv_Age": "Age",
                             "intv_Id": "ID",
                             "intv_Gender": "Gender",
                             "intv_Edu": "Education"
                         })

# 2. Fix the layout to look like a list (using the labels we just defined)
fig_scatter.update_traces(
    hovertemplate="<b>ID: %{customdata[0]}</b><br>" +
                  "Birth Country: %{customdata[4]}<br>" +
                  "Age: %{customdata[3]}<br>" +
                  "Education: %{customdata[2]}<br>" +
                  "Gender: %{customdata[1]}<br>" +
                  "Years of English: %{y}<br>" +
                  "Years of Spanish: %{x}<br>" +
                  "<extra></extra>"
)

# 3. Add the diagonal balance line
fig_scatter.add_shape(type="line", x0=0, y0=0, x1=60, y1=60, 
                      line=dict(color="MediumPurple", dash="dot"))

fig_scatter.show()