[Reference](https://towardsdatascience.com/finding-the-maximum-area-under-points-on-a-curve-in-python-1f38c5a8d2f1)

In [16]:
import pandas as pd
import numpy as np
from numpy import mean, std
def IDoutliers(df, zscorethreshold):
    '''
    Parameters: df
    ----------
    df : Numerical
Returns: 
    -------
    For each col with outliers (Z score exceeds treshold),
    a df with these datapoints is returned. 
    
    A list of the values is also returned for readability
    
    For convenience, a list of the index values is also 
    provided
'''
    #Establish lists to extend to return df with outliers 
    outliersdf = []
    indexvals = []
    colswithoutliers = []
    for col in df.columns:
        #Mean and sd  per col
        u = np.mean(df[col])
        sd = np.std(df[col])
        #Z-number threshold 
        z = zscorethreshold
        #Isolate all outliers per col
        outliers = df.loc[((abs(df[col]-u))/sd) > z]
        #Only select those columns with outliers
        if len(outliers) == 0:
            pass
        else:
            #Extend lists
            outliersdf.append(outliers.values)
            indexvals.append(outliers.index.values)
            colswithoutliers.append(col)
            
    #Print df of outliers per column
    for number, item in enumerate(outliersdf):
        print('\nData points with outliers in column {}\n'.format(colswithoutliers[number]))
        
        df = pd.DataFrame(outliersdf[number], 
                           index=indexvals[number], 
                           columns=df.columns)
        print(df)
        print('\nValues of outliers: {}'.format(df[colswithoutliers[number]].values))
        print('\nIndex values of outliers as a list:{}'.format(indexvals[number]))
        print('---------------------------------------------------------------------------')
        
    return print('Outlier report complete')

In [None]:
'''
This gist provides code for animating a point moving along a curve and finding the best trade-off between x and y - i.e., the value on 
the curve that has the maximum area underneath it. 

For a more detailed explanation of the code, please see the original article
'''


import numpy as np
import pandas as pd

# Generate data
x = np.array([0.001, 5.689989, 10.35714286,  20.71428571,  31.07142857,  41.42857143,
        51.78571429,  62.14285714,  72.5       ,  82.85714286,
        93.21428571, 103.57142857, 113.92857143, 124.28571429,
       134.64285714, 145, 199])

y = np.array([[548], [75], [66], [50], [40], [51], [42], [39], [21], [11], [10],
           [16], [20], [ 7], [ 1], [ 2], [np.NAN]])
y = y.reshape(17,)
# Make dataframe
data = pd.DataFrame({'x':x, 'y':y})

# Data shape won't let us
x.shape, y.shape


print(y[0]) # gives an array with one value

# Change data shape to allow us to make df
new_y = []
for value in y:
    new_y.append(value)
    
data = pd.DataFrame({'x':x, 'y':new_y})

# Removing outliers 

import matplotlib.pyplot as plt
plt.plot(x, y)

# Plot outliers with seaborn 

import seaborn as sns

fig, axes = plt.subplots(1, 2) # Create subplots
fig.set_figheight(5), fig.set_figwidth(8) # Alter figure size

# Create subplot on first axis, axis[0]
sns.boxplot(ax=axes[0], data=data['x']).set(title='x') 

# Remove ticks and labels of X axis since they're meaningless
axes[0].tick_params(bottom=False)
axes[0].set(xticklabels=[])

# Repeat for y data
sns.boxplot(ax=axes[1], data=data['y']).set(title='y')
axes[1].tick_params(bottom=False)
axes[1].set(xticklabels=[])
fig.suptitle('Boxplots using Seaborn to Detect Outliers', fontsize=16) # Add title

# Use outlier function (find code for function here: https://dankirk1995.medium.com/outlier-function-python-783b40e0da09 

IDoutliers(data, 3.25)

# Drop outlier 

data = data.drop(0)

plt.plot(data.x, data.y)

# Dealing with NA values 

data.isna() # not helpful 
data.isna().sum() # column wise
data.isna().sum().sum() # whole df

data = data.dropna()

# Make a spline curve

from scipy.interpolate import make_interp_spline
X_Y_Spline = make_interp_spline(data.x, data.y)
X_ = np.linspace(data.x.min(), data.x.max(), 500)
Y_ = X_Y_Spline(X_)
plt.plot(X_, Y_)

# Plot before and after splining 

# Filter by condition
fig, (ax1, ax2) = plt.subplots(1, 2)
fig.set_figheight(5), fig.set_figwidth(8) # Alter figure size
ax1.plot(data.x, data.y)
ax1.set_title('Original Data')
ax1.set_xlabel("x"), ax1.set_ylabel('y')
ax2.plot(X_, Y_)
ax2.set_title('Splined Data')
ax2.set_xlabel("x"), ax2.set_ylabel('y')

# Find best x/y trade-off

splined_df = pd.DataFrame({'X_':X_, 'Y_':Y_})
splined_df['area'] = splined_df.X_*splined_df.Y_

bestx = splined_df['X_'][splined_df['area'] == splined_df.area.max()].values[0]
besty = splined_df['Y_'][splined_df['area'] == splined_df.area.max()].values[0]

# Plotting

import matplotlib.patches as mpatches
plt.plot(X_, Y_)
plt.title('Max Area')
plt.xlabel('x'), plt.ylabel('y')
plt.xlim(0), plt.ylim(0) # Prevent negative axes numbers 
left, bottom, width, height = (0, 0, bestx, besty)
rect=mpatches.Rectangle((left,bottom),width,height, 
                        fill=True, # Coloured/No fill
                        facecolor="red", # Fill colour
                        #color="purple", # Line colour
                       linewidth=2, # Thickness of line 
                       alpha=0.1, #transparency
                       # hatch='o' # Alter pattern on rectangle 
                       )
plt.gca().add_patch(rect) # Add rectangle to figure
#Add point
plt.plot(bestx, besty, marker = 'o', color='navy')
#Add vertical and horizontal lines
plt.vlines(bestx, besty, 0, color='red', linestyles='dotted')
plt.hlines(besty, bestx, 0, color='red', linestyles='dotted')
#Add text
plt.text(17, 17, 'Area:\n{}'.format(round(splined_df.area.max(), 2)), 
# Add box and alter font size
         bbox = dict(facecolor = 'lightgrey', alpha = 0.5, edgecolor='black',
                     boxstyle='round'), fontsize=12)
                     

# Animating 

current_max = -np.inf # Specify lowest possible value as starting point
for i in range(0, len(splined_df)):
    plt.plot(splined_df.X_, splined_df.Y_)
    plt.title('Max Area')
    plt.xlabel('x'), plt.ylabel('y')
    plt.xlim(0), plt.ylim(0) # Prevent negative axes numbers 
    left, bottom, width, height = (0, 0, splined_df.X_[i], splined_df.Y_[i])
    rect=mpatches.Rectangle((left,bottom),width,height, 
                            fill=True, 
                            facecolor="red", 
                            #color="purple",
                           linewidth=2, 
                           alpha=0.1, 
                           # hatch='o' 
                           )
    plt.vlines(splined_df.X_[i], splined_df.Y_[i], 0, color='red', linestyles='dotted')
    plt.hlines(splined_df.Y_[i], splined_df.X_[i], 0, color='red', linestyles='dotted')
    plt.plot(splined_df.X_[i], splined_df.Y_[i], marker = 'o', color='navy')
    plt.gca().add_patch(rect)
    plt.text(90, 65, 'Area (1dp):{}'.format(round(splined_df.area[i], 1)), 
             bbox = dict(facecolor = 'lightgrey', alpha = 0.5, edgecolor='black',
                         boxstyle='round'), fontsize=10)
    
    if splined_df.area[i] > current_max:
        current_max=splined_df.area[i]
        plt.text(90, 57, 'Max Area (1dp):{}'.format(round(current_max, 1)), 
                 bbox = dict(facecolor = 'lightgrey', alpha = 0.5, edgecolor='black',
                             boxstyle='round'), fontsize=10)
    else:
        plt.text(90, 57, 'Max Area (1dp):{}'.format(round(current_max, 1)), 
                 bbox = dict(facecolor = 'lightgrey', alpha = 0.5, edgecolor='black',
                             boxstyle='round'), fontsize=10)
    plt.savefig(f'max_area-{i}.png')
    plt.close()
import imageio
with imageio.get_writer('max_area.gif', mode='i') as writer:
    for i in range(0, len(splined_df)):
        image = imageio.imread(f'max_area-{i}.png')
        writer.append_data(image)
import os
for i in range(0, len(splined_df)):
    os.remove(f'max_area-{i}.png')

548.0

Data points with outliers in column y

       x      y
0  0.001  548.0

Values of outliers: [548.]

Index values of outliers as a list:[0]
---------------------------------------------------------------------------
Outlier report complete
