In [1]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go

In [2]:
def extract3dDataArrays(dataType, sexSelected, placeSelected):

    if dataType == "deaths":
        d = pd.read_csv("../assets/data/deaths.csv")
        d2 = d.loc[
            (d['cntry'] == placeSelected) & 
            (d['sex'] == sexSelected) & 
            (d['age'] <= 90),
            :
        ]

        # format a number as a string to 0 decimal places
        def formatNumber(number):
            return f"{number:,.0f}"
        
        def testFormatNumber():
            assert formatNumber(123456789) == '123,456,789'
        
        # convert a real number to log10 scale and format to 3 decimal places
        def formatLog10(number):
            return f"{np.log10(number):.3f}"
        
        def testFormatLog10():
            assert formatLog10(1000) == '3.000'
        
        # Return a formatted string with a line break
        def labelSurfacePoint(row):
            return f"""In {row['year']}, at age {row['age']}:\n
                {formatNumber(row['number_of_deaths'])} deaths\n
                {formatLog10(row['number_of_deaths'])} log10 deaths"""
        
        def testLabelSurfacePoint():
            assert labelSurfacePoint(
                pd.Series({
                    'year': 2019,
                    'age': 90,
                    'number_of_deaths': 1000
                })
            ) == """In 2019, at age 90:\n
                1,000 deaths\n
                3.000 log10 deaths"""
        

        zArray = d2.loc[
            :, ['age', 'year', 'number_of_deaths']
        ].pivot(
            index = 'year', columns = 'age', values = 'number_of_deaths'
        )
        zArray = np.array(zArray)
        xRange = np.arange(d2['age'].min(), d2['age'].max())
        yRange = np.arange(d2['year'].min(), d2['year'].max())

        return xRange, yRange, zArray

In [3]:
sexValue = 'total'
placeValue = 'AUS'

xs, ys, zs = extract3dDataArrays('deaths', sexValue, placeValue)

Now want to figure out how to produce an array of labels of the same dimension as zs, which includes the age from xs and year from ys.


In [6]:
zs[0:5,0:5]
zs.shape
xs.shape



(90,)

In [9]:
d = pd.read_csv("../assets/data/deaths.csv")
d2 = d.loc[
    (d['cntry'] == placeValue) & 
    (d['sex'] == sexValue) & 
    (d['age'] <= 90),
    :
]

d2.head()

Unnamed: 0,country,link,cntry,year,age,sex,number_of_deaths
2,Australia,/Country/Country?cntr=AUS,AUS,1921,0,total,8966.85
5,Australia,/Country/Country?cntr=AUS,AUS,1921,1,total,1609.61
8,Australia,/Country/Country?cntr=AUS,AUS,1921,2,total,689.05
11,Australia,/Country/Country?cntr=AUS,AUS,1921,3,total,415.72
14,Australia,/Country/Country?cntr=AUS,AUS,1921,4,total,386.59


In [10]:

def make_label(row):
    return f"{row['number_of_deaths']} at age {row['age']} and year {row['year']}"

d2['label'] = d2.apply(make_label, axis = 1)

d2

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  d2['label'] = d2.apply(make_label, axis = 1)


Unnamed: 0,country,link,cntry,year,age,sex,number_of_deaths,label
2,Australia,/Country/Country?cntr=AUS,AUS,1921,0,total,8966.85,8966.85 at age 0 and year 1921
5,Australia,/Country/Country?cntr=AUS,AUS,1921,1,total,1609.61,1609.61 at age 1 and year 1921
8,Australia,/Country/Country?cntr=AUS,AUS,1921,2,total,689.05,689.05 at age 2 and year 1921
11,Australia,/Country/Country?cntr=AUS,AUS,1921,3,total,415.72,415.72 at age 3 and year 1921
14,Australia,/Country/Country?cntr=AUS,AUS,1921,4,total,386.59,386.59 at age 4 and year 1921
...,...,...,...,...,...,...,...,...
33227,Australia,/Country/Country?cntr=AUS,AUS,2020,86,total,5355.20,5355.2 at age 86 and year 2020
33230,Australia,/Country/Country?cntr=AUS,AUS,2020,87,total,5382.20,5382.2 at age 87 and year 2020
33233,Australia,/Country/Country?cntr=AUS,AUS,2020,88,total,5461.20,5461.2 at age 88 and year 2020
33236,Australia,/Country/Country?cntr=AUS,AUS,2020,89,total,5722.22,5722.22 at age 89 and year 2020


In [None]:
fig = go.Figure(
            data = 
                [go.Surface(
                    x = xs, 
                    y = ys, 
                    z = zs,
                    surfacecolor = np.log(zs)
                )]
        )

fig.show()