In [145]:
import pandas as pd, numpy as np
from ipyvizzu import Chart, Data, Config, DisplayTarget, ChartProperty, Style
from ipyvizzustory import Story, Slide, Step

# Example `.store()` call

In [146]:
sample_df = pd.read_csv(
    "https://ipyvizzu.vizzuhq.com/0.17/assets/data/music_data.csv"
)
data = Data()
data.add_df(sample_df)

chart = Chart(width='480px', height='240px', display=DisplayTarget.ACTUAL)

In [147]:
chart.animate(
    data,
    Config(
        {
            "channels": {
                "y": {"set": ["Popularity", "Kinds"]},
                "x": {"set": ["Genres"]},
                "color": {"set": ["Kinds"]},
                "label": {"set": ["Popularity"]},
            },
        }
    ),
)

In [148]:
chart.animate(
    Config(
        {
            "align": "stretch",
        }
    )
)
snapshot = chart.store()

In [149]:
chart.animate(snapshot, duration=0)
chart.animate(
    Config(
        {
            "channels": {
                # "x": { "attach": [ "Kinds" ] },
                "x": {"attach": "Kinds"},
                # "y": { "detach": [ "Kinds" ] },
                "y": {"detach": "Kinds"},
            },
            "align": "none",
        }
    )
)
animation = chart.store()

In [150]:
chart.animate(animation, duration=0)
chart.animate(
    Config(
        {
            "channels": {
                # "y": { "set": [ "Kinds", "Popularity" ] },
                "y": ["Kinds", "Popularity"],
                # "x": { "set": [ "Genres" ] },
                "x": "Genres",
            }
        }
    )
)

In [151]:
chart.animate(animation)

# Read in Data

Reading in the `graphing_table.csv` file from the data folder. See `master_agg.ipynb` for more information. 

In [152]:
df = pd.read_csv('../../data/graphing_table.csv')

In [153]:
# All metric columns should be numeric types
df.dtypes

uni                           object
school                        object
course                        object
year                           int64
Places                       float64
GPA                          float64
RP                           float64
employment_rate_overall      float64
employment_rate_ft_perm      float64
basic_monthly_mean           float64
basic_monthly_median         float64
gross_monthly_mean           float64
gross_monthly_median         float64
gross_mthly_25_percentile    float64
gross_mthly_75_percentile    float64
university                    object
dtype: object

# Story Charting

1. User selects course & university


**Admissions Criteria**

*slide*
- Display line chart of historical admission criteria (RP)
    - Include 25th percentile, median and 75th percentile entry requirements for that UNIVERSITY

*slide*
- Display line chart of historical admission criteria (RP)
    - Include other degrees in that university / school(?). Use a computed coloumn for the coloring.

**Admissions vs Salaries**

*slide*
- Display yearly movement of RP vs median GES (could show movement by label?) for this course ONLY. 

*slide*
- Display scatter plot of RP vs median income for this course against other similar courses (try using school?). Tag other courses by color. Will require a separate column.

*Then several slides for the other income metrics*(?)

**Employment Stats** 
For employment stats, want to have a transition from a bar chart to a stacked bar chart like in [here](https://ipyvizzu.vizzuhq.com/latest/examples/analytical_operations/compare/column_groupped_1/). 

Could do so for a degrees in the same school?


**nth slide**
Show full-time employment rate for this degree using polar coordinates?

[Technical Details]
Need to construct a new initial dataframe on demand when a course + uni combination is selected
- Create new column denoting "selected" vs "other" courses. Use for coloring purposes
- Add new rows for aggregated views (used for first slide & more)

# Data Generation

Constructing the necessary dataframe

In [154]:
# Which degree & university to examine
course = 'Arts (Hons)'
uni = 'NUS'
# Obtain the school for this course
school = df.loc[(df.course==course) & (df.uni==uni), 'school'].drop_duplicates().item()
# Copy the main dataframe
input_df = df.copy().sort_values('year', ascending=True)

## Admission Criteria (For RP)

In [155]:
# What years is there information for that course on? 
course_years = input_df.loc[(input_df.course == course)
                            & (input_df.uni==uni), 'year']
min_year, max_year = course_years.min().item(), course_years.max().item()


# Identify rows that are summary stats at the university level
input_df['summary_row'] = 0 
# Do this for EVERY university
for uni_tmp in df.uni.unique().tolist():

    # Filter for that uni, for records within the year range
    tdf = input_df.loc[(input_df.uni == uni_tmp)
                   & (input_df.year.between(min_year, max_year))]

    # Aggregate relevant metrics
    metrics = ['Places', 'GPA', 'RP',  'employment_rate_overall','employment_rate_ft_perm','basic_monthly_mean','basic_monthly_median','gross_monthly_mean','gross_monthly_median','gross_mthly_25_percentile','gross_mthly_75_percentile']
    # 25th Percentile rows
    uni_25th_pctile = tdf.groupby('year', as_index=False)[metrics].agg(lambda x: x.quantile(0.25))
    uni_25th_pctile['uni'] = uni_tmp
    uni_25th_pctile['course'] = f'{uni_tmp} 25th Percentile'
    # Median Rows
    uni_median = tdf.groupby('year', as_index=False)[metrics].agg(lambda x: x.quantile(0.5))
    uni_median['uni'] = uni_tmp
    uni_median['course'] = f'{uni_tmp} Median'
    # 75th Percentile Rows
    uni_75th_percentile = tdf.groupby('year', as_index=False)[metrics].agg(lambda x: x.quantile(0.75))
    uni_75th_percentile['uni'] = uni_tmp
    uni_75th_percentile['course'] = f'{uni_tmp} 75th Percentile'
    stats_table = pd.concat([uni_median, uni_25th_pctile, uni_75th_percentile], ignore_index=True)
    stats_table['summary_row'] = 1
    # Join back to input_df
    input_df = pd.concat([input_df,stats_table], ignore_index=True)


# track if a row is a course row or not
input_df['course_row'] = input_df.summary_row.replace({0: 1, 1: 0 })

In [94]:
print(input_df.shape) # 866

(866, 17)


# Salary Comparisons

## Final Transformations

In [156]:
# Create a computed column for the color. Will be used to identify the specific selected degree in a scatter plot situation
input_df.loc[:, 'color_label'] = 'others'
input_df.loc[(input_df['course']==course)
             & (input_df['school']==school)
             & (input_df['uni']==uni)
            ,'color_label'] = course

input_df.loc[(input_df.course.isin([f'{uni} 25th Percentile'
                                    ,f'{uni} Median'
                                    ,f'{uni} 75th Percentile'])
                                    )
                                    , 'color_label'] = np.nan


# Sort by year one more time and then convert the year type to STRING
input_df = input_df.sort_values('year', ascending=True)
input_df['year'] = input_df.year.astype(int).astype(str)

# Plotting

In [157]:
data = Data()
data.add_df(input_df)

In [160]:
# This setting will run each graph animation update in the cell it was called
dimensions = {'width': '800px'
              , 'height': '300px'}
story = Story(data=data) # Can add a Style configuration as well
story.set_size(**dimensions)
# Set tooltip so users can mouse over
story.set_feature('tooltip', True)

In [161]:
start_slide = Slide()
start_slide.add_step(
    Step(
        Data.filter(f"record['course_row']==1")
        , Config({
            'color': 'uni'
            , 'size': 'course_row'
            , 'geometry': 'circle'
        }

        )
    )
)

story.add_slide(start_slide)

In [144]:
# CREATE A STARTING STATE OF ALL THE COURSES IN ABSTRACT AS CIRCLES?
chart = Chart()
chart.animate(
    data,
    Config({
    'color': 'uni'
    , 'size' : 'course'
    , 'geometry': 'circle'
}))

In [162]:
# Filter for summary Rows only
unifilter = Data.filter(
    f"record['summary_row']==1"
    f" && (record['course']=='SMU Median' || record['course']=='NUS Median' || record['course']=='NTU Median')"
)
slide0 = Slide()
slide0.add_step(
    Step(
        unifilter
        , Config({
            'y': {'set': 'RP'
                  , 'range' : {
                      'min': 60
                  }}
            , 'x': 'year'
            , 'color': 'uni'
            , 'geometry': 'line'

        })
    )
)
story.add_slide(slide0)

# DO THIS FOR 25TH PERCENTILE & 75TH PERCENTILE AS NEXT STEPS? 

In [163]:
story.play()

## RP requirements

In [133]:
# Only the selected degree and uni, along with required medians
places_data_filter = Data.filter(
    f"record['uni']=='{uni}'"
    f" && record['course']=='{course}' || record['course']=='{uni} 25th Percentile' || record['course']=='{uni} Median' || record['course']=='{uni} 75th Percentile'"
    )

# # Only the selected degree and uni, along with required medians
uni_courses_filter = Data.filter(
    f"record['uni']=='{uni}'"
    f"&& (record['color_label']=='{course}' || record['color_label']=='others')"
    )

In [134]:
slide1 = Slide()
# slide1.add_step(
#     Step(
#         uni_courses_filter
#         , Config({
#                   'channels' : {
#                       'x' : 'year'
#                       , 'y': {
#                           'set': ['RP']
#                           , 'range': {
#                               'min': "60"
#                           }
#                       }
#                   }
#                   , 'geometry': 'circle'
#                   , 'color': 'course'
#               })
#         , y = {'duration': 2, 'delay': 2}
#         )
# )

slide1.add_step(
    Step(
        places_data_filter
        , Config({
                  'channels' : {
                      'x' : 'year'
                      , 'y': {
                          'set': ['RP']
                          , 'range': {
                              'min': "60"
                          }
                      }
                  }
                  , 'geometry': 'circle'
                  , 'color': 'course'
              })
        , y = {'duration': 2, 'delay': 2}
    )
)

# Add to story
story.add_slide(slide1)

In [135]:
story.play()

### By degree

This doesn't really work

In [31]:
story.add_slide(slide2)