In [1]:
# import dependencies
import pandas as pd
import math


In [2]:
# read csv on carbon emission data and convert to pandas dataframe
carbon = pd.read_csv('../resources/data/carbon_by_country.csv')

# select only emissions in gigatons of carbon (GtC) by USA since 1969
carbon = carbon[['year', 'USA']].loc[carbon['year'] >= 1969,:]

# rename 'USA' column to 'emissions(GtC)'
carbon = carbon.rename(columns={'USA': 'emissions(GtC)'})

# reset index
carbon = carbon.reset_index(drop=True)


In [3]:
# create funciton to calculate total accumulated carbon emissions(GtC)
def total(x):
    return carbon.loc[carbon['year'] < x,:]['emissions(GtC)'].sum()

# create column with total accumulated carbon emissions for each year
carbon['accumulated(GtC)'] = carbon['year'].apply(total)


In [4]:
# create funciton to calculate scaled dft size
def scale(x):
    return (x-0)/(70730-0)*(1048576-32)+32

# create column with scaled dft size for each year
carbon['dft_size'] = carbon['accumulated(GtC)'].apply(scale)


In [5]:
# create funciton to calculate viable fft size
def fft_size(x):
    return 1 if x == 0 else int(2**math.floor(math.log2(x)))

# create column with viable fft size for each year
carbon['fft_size'] = carbon['dft_size'].apply(fft_size)


In [6]:
# create dictionary to hold fft sizes and frequency resoulutions in hertz(Hz)
freq_res = {}

# all unique fft sizes used
ffts = carbon['fft_size'].unique()

# frequency resolutions for corresponding fft sizes
Hz = [20000, 86, 21, 10, 5, 2, 1, 0.5]

# add fft sizes and frequency resoulutions to dictionary
for i in range(0, len(ffts)):
    freq_res[ffts[i]] = Hz[i]

# create column with frequency resolutions for each year
carbon['freq_res(Hz)'] = carbon['fft_size'].map(freq_res)


In [7]:
# create funciton to calculate file names
def file(x):
    
    # iterate through unique fft sizes
    for i in range(0, len(ffts)):
        
        # if the year's fft size matches the indexed fft size:
        if x == ffts[i]:
            
            # return the file name with the number as the index plus 1
            return f'jimi{i+1}.mp3'

# create column with file name for each year
carbon['file'] = carbon['fft_size'].apply(file)


In [8]:
# create function to calculate starting times in milliseconds(ms) for each file
def file_begin(x):
    
    # number of years represented by previous files
    previous = 0
    
    # iterate through previous files
    for i in range(0, int(x[4])):
        
        # add the number of years represented by each previous file to previous
        previous = previous + len(carbon.loc[carbon['file'] == f'jimi{i}.mp3',:])

    # return file start time as previous years multiplied by scaled length of a year in audio playback(ms)
    return previous*4735

# create column for starting times of each file
carbon['file_begin(ms)'] = carbon['file'].apply(file_begin)


In [9]:
# create function to calculate playback starting times(ms) for each year
def play_begin(x):
    
    # return play start time as previous years multiplied by scaled length of a year in audio playback(ms)
    return (x-1969)*4735

# create column for playback starting times for each year
carbon['play_begin(ms)'] = carbon['year'].apply(play_begin)


In [10]:
# create function to calculate ending times(ms) for each file
def file_end(x):
    
    # last file ends at ending of manipulated audio
    if x == 'jimi8.mp3':
        return 241471
    
    # for all other files:
    else:
        # number of years represented by current file
        years = len(carbon.loc[carbon["file"] == x,:])
        
        # final year represented by a file is 10 seconds to allow crossfading into next year
        final_year = 10000
        
        # other years multiplied by scaled length of a year in audio playback(ms)
        other_years = (years-1)*4735
        
        # return file end time as begining time plus scaled length of years represented by file
        return file_begin(x) + final_year + other_years

# create column for ending times of each file
carbon['file_end(ms)'] = carbon['file'].apply(file_end)


In [11]:
# display full dataframe
carbon


Unnamed: 0,year,emissions(GtC),accumulated(GtC),dft_size,fft_size,freq_res(Hz),file,file_begin(ms),play_begin(ms),file_end(ms)
0,1969,1098.0,0.0,32.0,32,20000.0,jimi1.mp3,0,0,10000
1,1970,1181.0,1098.0,16309.41,8192,86.0,jimi2.mp3,4735,4735,14735
2,1971,1188.0,2279.0,33817.26,32768,21.0,jimi3.mp3,9470,9470,24205
3,1972,1244.0,3467.0,51428.89,32768,21.0,jimi3.mp3,9470,14205,24205
4,1973,1300.0,4711.0,69870.69,65536,10.0,jimi4.mp3,18940,18940,43145
5,1974,1254.0,6011.0,89142.67,65536,10.0,jimi4.mp3,18940,23675,43145
6,1975,1201.0,7265.0,107732.7,65536,10.0,jimi4.mp3,18940,28410,43145
7,1976,1257.0,8466.0,125537.1,65536,10.0,jimi4.mp3,18940,33145,43145
8,1977,1292.0,9723.0,144171.6,131072,5.0,jimi5.mp3,37880,37880,76290
9,1978,1333.0,11015.0,163325.0,131072,5.0,jimi5.mp3,37880,42615,76290


In [12]:
# export dataframe as csv
carbon.to_csv('../resources/data/carbon.csv')


In [13]:
# group years by file name
files_group = carbon.groupby('file')

# create dictionary to hold data for files
files = {}

# columns to extract data from
columns = ['fft_size', 'freq_res(Hz)', 'file_begin(ms)', 'file_end(ms)']

# iterate through columns
for c in columns:
    
    # create key to hold list with data for each column
    files[c] = []
    
    # append each column's data points to its respective list
    for u in files_group[c].unique():
        files[c].append(u[0])

# create dataframe out of files dictionary
files = pd.DataFrame(files)

# set files index as file name
files = files.set_index(carbon['file'].unique())

# display dataframe
files


Unnamed: 0,fft_size,freq_res(Hz),file_begin(ms),file_end(ms)
jimi1.mp3,32,20000.0,0,10000
jimi2.mp3,8192,86.0,4735,14735
jimi3.mp3,32768,21.0,9470,24205
jimi4.mp3,65536,10.0,18940,43145
jimi5.mp3,131072,5.0,37880,76290
jimi6.mp3,262144,2.0,71025,137845
jimi7.mp3,524288,1.0,132580,242015
jimi8.mp3,1048576,0.5,236750,241471
