In [15]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from scipy.stats import linregress
import geopandas as gpd
from mpl_toolkits.axes_grid1 import make_axes_locatable
from matplotlib.animation import FuncAnimation
from IPython import display
from IPython.display import display, HTML
from scipy.interpolate import interp1d
%load_ext autoreload
%autoreload 2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib import animation, rc
rc('animation', html='html5')
from IPython.display import HTML, Image
from itertools import groupby

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib import animation, rc
rc('animation', html='html5')
from IPython.display import HTML, Image
from itertools import groupby


class Eased:
    """ This class takes the original time vector and raw data (as a m*n matrix or dataframe) along with an output vector and interpolation function
    For the input data, the rows are the different variables and the columns correspond to the time points"""

    def __init__(self, data,data_y=None, in_t=None):

        if isinstance(data, pd.DataFrame):
            self.labels=np.append(data.index.values,np.array([data.index.values[0],data.index.values[0]]))
            self.int_t = np.arange(len(self.labels)-1)


            self.data = np.vstack((data.values,data.values[0,:]))
            self.n_dims = data.shape[1]
            self.columns=data.columns
        elif isinstance(data, np.ndarray):
            if in_t is None:
                in_t=np.arange(np.shape(data)[0])
                print("No time vector included - defaulting to number of rows")

            self.int_t = in_t
            self.data = data
            self.n_dims = len(np.shape(data))
        else:
            print('\033[91m' + "Data is unrecognized type : must be either a numpy array or pandas dataframe")


    def No_interp(self,smoothness=10):
        out_t=np.linspace(min(self.int_t),max(self.int_t),len(self.int_t)*smoothness)
        self.n_steps = int(np.ceil(len(out_t) / len(self.int_t)))
        self.out_t = out_t

        #This Function maps the input vecotor over the outuput time vector without interoplation
        if self.n_dims == 1: # if the input is only one row
            self.eased = np.zeros((len(self.out_t), 1))
            for i, t in enumerate(self.out_t):
                self.eased[i] = self.data[int(np.floor(i / self.n_steps))]
        else: #if the input is a multidimensional row
            self.eased = np.zeros((np.shape(self.data)[0], len(self.out_t)))
            for z in range(np.shape(self.data)[0]):
                for i, t in enumerate(self.out_t):
                    self.eased[z, i] = self.data[z, int(np.floor(i / self.n_steps))]

        return self.eased




    def scatter_animation2d(self,n=3,smoothness=30,speed=1.0,gif=False,destination=None,plot_kws=None,label=False):
        """
        Flexibly create a 2d scatter plot animation.

        This function creates a matplotlib animation from a pandas Dataframe or a MxN numpy array. The Columns are paired
        with x and y coordinates while the rows are the individual time points.

        This takes a number of parameters for the animation, as well as


        Parameters
        ----------
        n: Exponent of the power smoothing
        smoothness: how smooth the frames of the animation are
        speed: speed
        inline:
        gif:
        destination:
        :return:
        """


        #Running checks on data for mishappen arrays.
        if np.shape(self.data)[1]%2!=0:
            print('\033[91m' + "Failed: Data must have an even number of columns")
            exit()
        if np.shape(self.data)[0]<np.shape(self.data)[1]:
            print('\033[91m' + "Warning : Data has more columns (xys) than rows (time)")


        if plot_kws is None:
            plot_kws = dict()


        it_data=self.power_ease(n,smoothness)

        # filling out missing keys
        vanilla_params={'s':10,'color':'blue','xlim':[np.min(it_data)-1, np.max(it_data)+1],'ylim':[np.min(it_data)-1,np.max(it_data)+1],'xlabel':'','ylabel':'','alpha':1.0,'figsize':(6,6)}
        for key in vanilla_params.keys():
            if key not in plot_kws.keys():
                plot_kws[key] = vanilla_params[key]



        fig, ax = plt.subplots(figsize=plot_kws['figsize'])
        ax.set_xlim(plot_kws['xlim'])
        ax.set_ylim(plot_kws['ylim'])
        ax.set_xlabel(plot_kws['xlabel'])
        ax.set_ylabel(plot_kws['ylabel'])

        if label==True:
            label_text = ax.text(plot_kws['xlim'][1]*0.75, plot_kws['ylim'][1]*.9, '',fontsize=18)

        n_dots=int(np.shape(self.data)[1]/2)
        dots=[]
        for i in range(n_dots):
            dots.append(ax.plot([], [], linestyle='none', marker='o', markersize=plot_kws['s'], color=plot_kws['color'], alpha=plot_kws['alpha']))



        def animate(z):
            for i in range(n_dots):
                dots[i][0].set_data(it_data[z,i*2],it_data[z,i*2+1])
            if label==True:
                label_text.set_text(self.labels[int(np.floor((z+smoothness/2)/smoothness))])
                return dots,label_text
            else:
                return dots

        anim = animation.FuncAnimation(fig, animate, frames=len(self.out_t),interval=400/smoothness/speed, blit=False)


        if destination is not None:
            if destination.split('.')[-1]=='mp4':
                writer = animation.writers['ffmpeg'](fps=60)
                anim.save(destination, writer=writer, dpi=100)
            if destination.split('.')[-1]=='gif':
                anim.save(destination, writer='imagemagick', fps=smoothness)

        if gif==True:
            return Image(url='animation.gif')
        else:
            return anim
        
        if destination is not None:
            if destination.split('.')[-1]=='mp4':
                writer = animation.writers['ffmpeg'](fps=60)
                anim.save(destination, writer=writer, dpi=100)
            if destination.split('.')[-1]=='gif':
                anim.save(destination, writer='imagemagick', fps=30)

        if gif==True:
            return Image(url='animation.gif')
        else:
            return anim

if __name__ == "__main__":
    print('EASING : A library for smooth animations in python : version 0.1.0')
    # simple example : one point moving over time
    # data = np.random.random((10, 2))
    # Eased(data).scatter_animation2d(n=3, speed=0.5, destination='media/singlepoint.gif')

EASING : A library for smooth animations in python : version 0.1.0


In [2]:
%load_ext autoreload
%autoreload 2

In [4]:
directory_path = r'C:\Users\darkc\OneDrive\Documents\MICA\Viz in Practice\Viz_In_Practice'
os.chdir(directory_path)

In [5]:
cp = pd.read_csv('Children in poverty.csv')
ei = pd.read_csv('EITC_data.csv')

In [6]:
abs_cp = cp[cp['DataFormat'] == 'Number'].copy()
prct_df = cp[cp['DataFormat'] == 'Percent'].copy()
abs_cp = abs_cp.rename(columns={'Data': 'Children_in_Poverty'})
abs_cp = abs_cp.drop('DataFormat', axis=1)
prct_df = prct_df.rename(columns={'Data': 'Percent_Child_Poverty'})
prct_df = prct_df.drop('DataFormat', axis=1)
cp2 = pd.merge(abs_cp, prct_df, on=['Location', 'TimeFrame'])
cp2 = cp2.drop('LocationType_y', axis=1)

In [7]:
cp2 = cp2[cp2['Location'] != 'Puerto Rico']

In [8]:
cp2['Year'] = pd.to_numeric(cp2['TimeFrame'], errors='coerce')
cp2_filtered = cp2[cp2['TimeFrame'] >= 2000].copy()
cp2_filtered.sort_values(by=['Location', 'TimeFrame'], inplace=True)
cp2_filtered['Children_in_Poverty'] = pd.to_numeric(cp2_filtered['Children_in_Poverty'], errors='coerce')
cp2_filtered['Percent_Child_Poverty'] = pd.to_numeric(cp2_filtered['Percent_Child_Poverty'], errors='coerce')
cp2_states = cp2_filtered[cp2_filtered['LocationType_x'] == 'State']

In [9]:
df = cp2_states.copy()
states = df['Location'].unique()
years = df['TimeFrame'].unique()
df.head()

Unnamed: 0,LocationType_x,Location,TimeFrame,Children_in_Poverty,Percent_Child_Poverty,Year
1,State,Alabama,2000,233000,0.21,2000
104,State,Alabama,2001,253000,0.23,2001
107,State,Alabama,2002,255000,0.24,2002
210,State,Alabama,2003,255000,0.24,2003
213,State,Alabama,2004,249000,0.23,2004


In [None]:
print(states)

In [23]:
labels=np.append(df.index.values,np.array([df.index.values[0],df.index.values[0]]))
int_t = np.arange(len(labels)-1)
data = np.vstack((df.values,df.values[0,:]))
n_dims = df.shape[1]
columns=df.columns



In [33]:
def power_ease(data, n,smoothness=10):
    out_t=np.linspace(min(int_t),max(int_t),len(int_t)*smoothness)
    n_steps = int(np.ceil(len(out_t) / len(int_t)))
    out_t = out_t
    sign = n % 2 * 2
    if n_dims == 1:
        eased = np.zeros((len(out_t), 1))
        j = 0
        for i in range(len(int_t) - 1):

            start = data[i]
            end = data[i + 1]
            for t in np.linspace(0, 2, n_steps):
                if (t < 1):
                    val = (end - start) / 2 * t ** n + start

                else:
                    t -= 2
                    val = (1 - sign) * (-(end - start) / 2) * (t ** n - 2 * (1 - sign)) + start

                eased[j] = val
                j += 1
        eased[j:] = data[i + 1]

    else:
        eased = np.zeros(( len(out_t),np.shape(data)[1]))
        for z in range(np.shape(data)[1]):
            j = 0
            for i in range(len(int_t) - 1):

                start = data[ i,z]
                end = data[ i + 1,z]
                for t in np.linspace(0, 2, n_steps):
                    if (t < 1):
                        val = (end - start) / 2 * t ** n + start

                    else:
                        t -= 2
                        val = (1 - sign) * (-(end - start) / 2) * (t ** n - 2 * (1 - sign)) + start

                    eased[ j,z] = val
                    j += 1
            eased[ j:,z] = data[ i + 1,z]

    return eased

In [34]:
size = len(df['Children_in_Poverty'])

# Extract 'Children_in_Poverty' for different years (you can customize this)

result_arrays = []

for year in years:
    data_for_year = df[df['Year'] == year]['Children_in_Poverty'].values.reshape(1, -1)
    result_arrays.append(data_for_year)

# Concatenate the arrays to match the example structure
result_array = np.concatenate(result_arrays, axis=0)

columns = df['Location'].unique()  # Use unique locations as columns
result_df = pd.DataFrame(result_array, columns = columns, index = years)
labels=np.append(result_df.index.values,np.array([result_df.index.values[0],result_df.index.values[0]]))
int_t = np.arange(len(labels)-1)
n_dims = result_df.shape[1]
columns=result_df.columns
result_df = np.vstack((result_df.values,result_df.values[0,:]))

eased_data = power_ease(result_df,3,10)

In [None]:
def scatter_animation2d(self,n=3,smoothness=30,speed=1.0,gif=False,destination=None,plot_kws=None,label=False):
    """
    Flexibly create a 2d scatter plot animation.

    This function creates a matplotlib animation from a pandas Dataframe or a MxN numpy array. The Columns are paired
    with x and y coordinates while the rows are the individual time points.

    This takes a number of parameters for the animation, as well as


    Parameters
    ----------
    n: Exponent of the power smoothing
    smoothness: how smooth the frames of the animation are
    speed: speed
    inline:
    gif:
    destination:
    :return:
    """


    #Running checks on data for mishappen arrays.
    if np.shape(self.data)[1]%2!=0:
        print('\033[91m' + "Failed: Data must have an even number of columns")
        exit()
    if np.shape(self.data)[0]<np.shape(self.data)[1]:
        print('\033[91m' + "Warning : Data has more columns (xys) than rows (time)")


    if plot_kws is None:
        plot_kws = dict()


    it_data=self.power_ease(n,smoothness)

    # filling out missing keys
    vanilla_params={'s':10,'color':'black','xlim':[np.min(it_data)-1, np.max(it_data)+1],'ylim':[np.min(it_data)-1,np.max(it_data)+1],'xlabel':'','ylabel':'','alpha':1.0,'figsize':(6,6)}
    for key in vanilla_params.keys():
        if key not in plot_kws.keys():
            plot_kws[key] = vanilla_params[key]



    fig, ax = plt.subplots(figsize=plot_kws['figsize'])
    ax.set_xlim(plot_kws['xlim'])
    ax.set_ylim(plot_kws['ylim'])
    ax.set_xlabel(plot_kws['xlabel'])
    ax.set_ylabel(plot_kws['ylabel'])

    if label==True:
        label_text = ax.text(plot_kws['xlim'][1]*0.75, plot_kws['ylim'][1]*.9, '',fontsize=18)

    n_dots=int(np.shape(self.data)[1]/2)
    dots=[]
    for i in range(n_dots):
        dots.append(ax.plot([], [], linestyle='none', marker='o', markersize=plot_kws['s'], color=plot_kws['color'], alpha=plot_kws['alpha']))



    def animate(z):
        for i in range(n_dots):
            dots[i][0].set_data(it_data[z,i*2],it_data[z,i*2+1])
        if label==True:
            label_text.set_text(self.labels[int(np.floor((z+smoothness/2)/smoothness))])
            return dots,label_text
        else:
            return dots

    anim = animation.FuncAnimation(fig, animate, frames=len(self.out_t),interval=400/smoothness/speed, blit=False)


    if destination is not None:
        if destination.split('.')[-1]=='mp4':
            writer = animation.writers['ffmpeg'](fps=60)
            anim.save(destination, writer=writer, dpi=100)
        if destination.split('.')[-1]=='gif':
            anim.save(destination, writer='imagemagick', fps=smoothness)

    if gif==True:
        return Image(url='animation.gif')
    else:
        return anim


In [32]:
eased_data.scatter_animation2d(speed=0.5,label=True,plot_kws={'alpha':0.5},destination='Test.gif')

AttributeError: 'numpy.ndarray' object has no attribute 'scatter_animation2d'

In [None]:
?easing

In [None]:
?easing.Eased

In [None]:
?easing.Eased.scatter_animation2d

In [None]:
size = len(df['Children_in_Poverty'])

# Extract 'Children_in_Poverty' for different years (you can customize this)
result_arrays = []

for year in years:
    data_for_year = df[df['Year'] == year]['Children_in_Poverty'].values.reshape(1, -1)
    result_arrays.append(data_for_year)

# Concatenate the arrays to match the example structure
result_array = np.concatenate(result_arrays, axis=0)

columns = df['Location'].unique()  # Use unique locations as columns
result_df = pd.DataFrame(result_array, columns=columns, index=years)

# Get the min and max values for X and Y axes
x_min, x_max = result_df.columns.min(), result_df.columns.max()
y_min, y_max = result_df.values.min(), result_df.values.max()

# Create the scatter plot
fig, ax = plt.subplots()
scatter_plot = easing.Eased(result_df).scatter_animation2d(
    speed=0.5, label=True, plot_kws={'alpha': 0.5}, destination='Test.gif'
)

# Set axis limits
ax.set_xlim(x_min, x_max)
ax.set_ylim(y_min, y_max)

# Add axis labels
ax.set_xlabel('X Axis Label')
ax.set_ylabel('Y Axis Label')

# Show the plot
plt.show()

In [None]:
# Use the transposed DataFrame for animation
easing.Eased(result_df).scatter_animation2d(speed=0.5, label=True, plot_kws={'alpha': 0.5}, destination='Test1.gif')

In [None]:
easing.Eased(cp2_filtered).scatter_animation2d(speed=0.5,label=True,plot_kws={'alpha':0.5},destination='ScatterTest.gif')

In [None]:
# advanced scatterplot animation : multiple points moving with labels saved as a gif

size=100
u=np.random.multivariate_normal([1,1],[[1, 0.5], [0.5, 1]],size=size).reshape(1,-1)
v=np.random.multivariate_normal([1,1],[[2, 1], [1, 2]],size=size).reshape(1,-1)
w=np.random.multivariate_normal([1,1],[[4, 2], [2, 4]],size=size).reshape(1,-1)

data=pd.DataFrame(np.vstack([u,v,w]),index=['small','medium','large'])

data.head()

In [None]:
easing.Eased(data).scatter_animation2d(speed=0.5,label=True,plot_kws={'alpha':0.5},destination='ScatterTest.gif')