In [1]:
%%capture
%run ./Config.ipynb # run this cell only once
%run ./Clean.ipynb # run this cell only once

### Read Data File

In [2]:
path_fund = os.path.dirname(os.getcwd())
raw_hedge_funds = pd.read_csv(os.path.join(path_fund, "data/edhec-risk-hedgefunds.csv"), delimiter = ";")

### Box & Whiskers Chart for Risk Profile Assessment

In [3]:
def alt_box_whiskers(df):
    
    #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    # transforming the data and performing the necessary statistical calculations
    
    lst_of_funds = [] # intialise a list
    for var in df.fund.unique():
        
        data = df[df['fund'] == var]
        fund_mean = data['return'].mean() # mean calculation
        fund_se = stats.sem(data['return']) # standard error calculation

        lower_95, upper_95 = stats.t.interval(alpha = 0.95, 
                                              df = len(data)-1,  # degrees of freedom - 1
                                              loc = fund_mean, 
                                              scale = fund_se)

        lower_66, upper_66 = stats.t.interval(alpha = 0.66666, 
                                              df = len(data)-1,  # degrees of freedom - 1
                                              loc = fund_mean, 
                                              scale = fund_se)

        lst_of_funds.append([var, fund_mean, fund_se, lower_95, upper_95, lower_66, upper_66]) # append all to list

    df = pd.DataFrame.from_records(lst_of_funds)
    df.columns = ['fund', 'mean', 'se', 'lower_95', 'upper_95', 'lower_66', 'upper_66']
        
    #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    # create altair charts
    
    base = alt.Chart(df, title = 'Historical returns (%) of funds').mark_point(
        color = 'red',
        size = 30,
        filled = True).encode(
        x = alt.X("mean:Q", 
                  title = 'Historical returns, mean, 95% & 66% lower/upper intervals', 
                  scale = {"domain":[-0.8,2]}),
        y = alt.Y("fund:N"))
    
    line = alt.Chart(df).mark_bar(color ='red',size = 3).encode(
        x = 'lower_95', 
        x2 = 'upper_95',
        y = alt.Y("fund:N"))
    
    bar = alt.Chart(df).mark_bar(color = 'black', size = 12).encode(
        x = 'lower_66',
        x2 = 'upper_66',
        y = alt.Y("fund:N"))
    
    mean = alt.Chart(df).mark_point(color = 'white', size = 80, filled = True).encode(
        x = alt.X('mean:Q'),
        y = alt.Y("fund:N"))
    
    return (base + line + bar + mean).properties(height = 500, width = 780) # layer charts

In [4]:
#alt_box_whiskers(clean_funds(raw_hedge_funds)) #uncomment to see chart 

### Detailed risk assessment for each fund [eg: Volatility, skewness, kurtosis, sharpe_ratio, Value-at-Risk (VaR)]

In [5]:
def risk_assessment(df):
    
    val = [] # initialise a list
    
    for var in df.fund.unique():
        
        data = df[df['fund'] == var]
        # mean
        mean = data['return'].mean()
        # daily standard deviation
        std_dev = data['return'].std()
        # coefficient of variation
        CV = mean/std_dev
        # sharpe_ratio calc
        sharpe_ratio = (mean - risk_free_rate)/std_dev # (mean-riskfree)/std
        # kurtosis calc
        kurto = kurtosis(data['return'].values)
        # skewness calc
        skewness = skew(data['return'].values)
        # Value-at-risk calc - using point percentile function
        VaR_90 = norm.ppf(1 - 0.9, mean, std_dev)
        VaR_95 = norm.ppf(1 - 0.95, mean, std_dev)
        VaR_99 = norm.ppf(1 - 0.99, mean, std_dev)
        # min/max
        historical_min = data['return'].min()
        historical_max = data['return'].max()
        # append into list
        val.append([var, historical_min, historical_max, 
                    mean, std_dev, CV, 
                    skewness, kurto, sharpe_ratio, 
                    VaR_90, VaR_95, VaR_99])
        
    df = pd.DataFrame.from_records(val)
    df.columns = ['fund','historical min', 'historical max', 
                  'mean', 'daily volatility', 'Coefficient_Variation',
                  'skewness','kurtosis', 'daily sharpe_ratio', 
                  'VaR_90', 'VaR_95', 'VaR_99']
    
    return df.set_index('fund')

In [6]:
risk_stats_df = risk_assessment(clean_funds(raw_hedge_funds))
#risk_stats_df #uncomment to see output

### Using Locally Weighted Scatterplot Smoothing (LOWESS) algorithm
- Apply best estimate to get a more smooth line to understand the trend
- Adding confidence interval bands to assess volatility and perform outlier detection 
- Pairing with probability density plot for distribution assessment

In [7]:
def lowness_density_charts(df):
    
    charts = []
    for var in df.fund.unique():
        data = df[df['fund'] == var]
        x = data['date'] #time values are plotted on the x-axis
        y = data['return'] #the dependent variable (approve) is plotted on the y-axis
        
        min_ = -15
        max_ = 25
        
        # Using the Lowess Smoother function in Tsmoothie to build the Lowess model. 
        # A key parameter is smooth_fraction, which co
        # how responsive the model is to changes in the data. For example, 
        # if the fraction is 1.0, the LOWESS model will fit a st
        # line (like linear regression). The closer the fraction is to 0, 
        # the more that LOWESS model follows individual data point

        smoother = LowessSmoother(smooth_fraction = 0.05, iterations = 5)
        smoother.smooth(y)

        # The follow defines the uncertainty band associated with the LOWESS model. You have several options, including
        # confidence_interval, prediction_interval, sigma_interval, and kalman_interval
        lower_band, upper_band = smoother.get_intervals('prediction_interval')
        data['lower_band'] = lower_band.T
        data['upper_band'] = upper_band.T
        data['smooth_estimate'] = smoother.smooth_data[0].T
        
        #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        # create altair charts
        # we will vconcat 2 charts - smoother line chart with probability density chart for each fund
        
        # First - smoother line chart

        band = alt.Chart(data, title = 'Volatility surrounding' + ' ' + var + ' ' + 'fund').mark_area(
            color = '#009f29').encode(
            x = alt.X('date:T', title = 'PERIOD'),
            y = alt.Y('lower_band:Q',
                      scale = alt.Scale(domain = [min_,max_])),
            y2 ='upper_band:Q',
            opacity = alt.value(0.2))

        line = alt.Chart(data).mark_line(color = 'red').encode(
            x = alt.X('date:T', title = 'PERIOD'),
            y = alt.Y('smooth_estimate', 
                      scale = alt.Scale(domain = [min_,max_]),
                      title = 'RETURNS'))

        point = alt.Chart(data).mark_point(color = 'black', filled = True).encode(
            x = alt.X('date:T', title = 'PERIOD'),
            y = alt.Y('return:Q', title = 'RETURNS',
                      scale = alt.Scale(domain = [min_,max_])))
        
        smoother_chart = (band + point + line) # combine all 3 charts
        
        
        # second - probability density chart
        
        prob_dens_chart = alt.Chart(data, title = 'Probability Density of' + ' ' + var + ' ' + 'fund').transform_density(
            'return', as_ = ['RETURNS', 'DENSITY']).mark_area(color = 'lightgrey').encode(
            x = "RETURNS:Q",
            y = 'DENSITY:Q')
        
        combined = alt.hconcat(smoother_chart, prob_dens_chart) #horizontal concat both charts
        
        charts.append(combined)
        
    return alt.vconcat(*charts)

In [8]:
lowness_density_visuals = lowness_density_charts(clean_funds(raw_hedge_funds))
#lowness_density_visuals #uncomment to see output

### T-SNE to cluster funds based on historical returns

In [9]:
def t_sne(df):
    
    df = df.pivot(index = "fund", columns="date", values="return").dropna() #convert to wide format

    return_movements = df.values # return values
    fund = df.index # fund values
    
    # sklearn’s normalize with its default settings to convert all the individual values 
    # of the fund movements into the same scale
    normalized_return_movements = normalize(return_movements)

    # learning rate of 10 applied
    TSNE_model = TSNE(learning_rate = learning_tsne_rate)

    # apply the model to the array we normalized via the .fit_transform() method and 
    # we’ll create arrays from the resulting features which will constitute the X and Y coordinates from our scatter plot
    TSNE_features = TSNE_model.fit_transform(normalized_return_movements)

    # constitute the X and Y coordinates from our scatter plot
    X = TSNE_features[:,0]
    Y = TSNE_features[:,1]
    
    # Create altair scatterplot to T-SNE visualisation
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    
    data = pd.DataFrame({'fund': fund, 'x': X, 'y': Y})
    min_x, max_x = data['x'].min(), data['x'].max()
    min_y, max_y = data['y'].min(), data['y'].max()

    base = alt.Chart(data).mark_point(filled = True, size = 120).encode(
        x = alt.X('x:Q', 
                  scale = alt.Scale(domain = [min_x,max_x]),
                 axis = alt.Axis(grid=False)),
        y = alt.Y('y:Q',
                  scale = alt.Scale(domain = [min_y,max_y]),
                  axis = alt.Axis(grid=False)),
        color = alt.Color('fund:N', legend=None))

    annotation = alt.Chart(data).mark_text(
        align='left', baseline='middle', fontSize = 12, dx = 7).encode(
        x='x:Q', y='y:Q', text='fund:N')

    chart = (base + annotation).properties(height = 300, width = 780)
                
    return chart

In [10]:
#t_sne(clean_funds(raw_hedge_funds)) #uncomment to see output