# Analysis and Final Remarks

## Abstract

### Imports

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

### CSV Read-in

In [4]:
df = pd.read_csv("../data/labeled.csv")
df.head()

Unnamed: 0,artist_id,artist,album_id,album_names,track_id,track_number,track_title,track_duration,track_disc_number,danceability,...,key_tone_E,key_tone_F,key_tone_F#,key_tone_G,key_tone_G#,time_signature_1.0,time_signature_3.0,time_signature_4.0,time_signature_5.0,label
0,1L0y9srZMyh9XUnYGv37IP,'Til Tuesday,67OxSfZotEq8cCa5SCeX5r,Everything's Different Now,5n9QFM4EiMeLGO0Mbwaqov,1.0,Everything's Different Now,236733.0,1.0,0.672,...,0,0,0,0,0,0,0,1,0,4
1,1L0y9srZMyh9XUnYGv37IP,'Til Tuesday,67OxSfZotEq8cCa5SCeX5r,Everything's Different Now,0BKslA1XqG8HBUKBl4d7EN,2.0,Rip In Heaven,211200.0,1.0,0.612,...,1,0,0,0,0,0,0,1,0,12
2,1L0y9srZMyh9XUnYGv37IP,'Til Tuesday,67OxSfZotEq8cCa5SCeX5r,Everything's Different Now,5G7NBdQLHc7GkehNmLG3yJ,3.0,Why Must I,221000.0,1.0,0.586,...,0,0,0,0,0,0,0,1,0,13
3,1L0y9srZMyh9XUnYGv37IP,'Til Tuesday,67OxSfZotEq8cCa5SCeX5r,Everything's Different Now,0J6nZDgEuwXtECq8Pukn1n,4.0,J For Jules,265493.0,1.0,0.475,...,0,0,0,0,0,0,0,1,0,4
4,1L0y9srZMyh9XUnYGv37IP,'Til Tuesday,67OxSfZotEq8cCa5SCeX5r,Everything's Different Now,3Lm7hSTabr01oav0WdSsuD,5.0,(Believed You Were) Lucky,216973.0,1.0,0.612,...,0,1,0,0,0,0,0,1,0,10


#### Undummy

##### Helper Function

In [56]:
def undummy(df, dummies, first_dropped=True, dropped=None):
    '''
    Given a DataFrame and list of dummy columns for one category, returns a collaposed categorical series.
    
    inputs:
    df: DataFrame that contains dummies
    dummies: list of dummy columns for one category
    first_dropped: Expects boolean, True indicates that the dummies have an inferred value, False indicates no categories are inferred.  Defaults to True.
    dropped: Expects string, if there is an inferred category, this should be the categorie's value without the dummy prefix
    
    returns:
    None if first_dropped is True but the dropped category isn't given
    Otherwise returns a pandas series that has the categories as values
    '''
    delimiter_index = dummies[0].rindex("_")
    ##Undummy first
    if first_dropped:
        #Check dependent arguement
        if not dropped:
            print("Give the dropped dummy if first was dropped!")
            return
        else:
            #We can do this easily by row wise summing the dummy columns, anything with 0 is the inferred dummy column
            #flipping the 0 and 1s is a matter of just doing 1 - Series
            dummy_name = dummies[0][:delimiter_index+1] + dropped
            df[dummy_name] = 1 - df[dummies].sum(axis=1)
            dummies.append(dummy_name) #for rest of manipulation
    
    #Easiest to understand this one from inside out
    #the np.where first finds the position row wise for any column that is 1, or is true valued
    #this then used as an index for the .columns attribute to fetch the appropriate dummy column head
    #Take this list, then make it a series
    dummy_series = pd.Series(df[dummies].columns[np.where(df[dummies]==1)[1]])
    
    #dynamically drop prefix
    dummy_series = dummy_series.map(lambda x: x[delimiter_index+1:])
    return dummy_series

##### Undummying

In [48]:
dummies = ['key_tone_A#',
       'key_tone_B', 'key_tone_C', 'key_tone_C#', 'key_tone_D', 'key_tone_D#',
       'key_tone_E', 'key_tone_F', 'key_tone_F#', 'key_tone_G', 'key_tone_G#',]

In [55]:
df["key"] = undummy(df, dummies, dropped="A")
df.drop(columns=dummies,inplace=True)

In [57]:
dummies = ['time_signature_1.0', 'time_signature_3.0', 'time_signature_4.0',
       'time_signature_5.0']

In [61]:
df["time_signature"] = undummy(df, dummies, dropped="0.0").astype(float).astype(int).astype(str)
df.drop(columns=dummies, inplace=True)

In [62]:
df.head()

Unnamed: 0,artist_id,artist,album_id,album_names,track_id,track_number,track_title,track_duration,track_disc_number,danceability,...,is_major,speechiness,acousticness,valence,tempo,is_live,is_instrumental,label,key,time_signature
0,1L0y9srZMyh9XUnYGv37IP,'Til Tuesday,67OxSfZotEq8cCa5SCeX5r,Everything's Different Now,5n9QFM4EiMeLGO0Mbwaqov,1.0,Everything's Different Now,236733.0,1.0,0.672,...,1.0,0.0393,0.0693,0.73,123.777,0,0,4,A#,4
1,1L0y9srZMyh9XUnYGv37IP,'Til Tuesday,67OxSfZotEq8cCa5SCeX5r,Everything's Different Now,0BKslA1XqG8HBUKBl4d7EN,2.0,Rip In Heaven,211200.0,1.0,0.612,...,1.0,0.0351,0.428,0.815,131.809,0,0,12,E,4
2,1L0y9srZMyh9XUnYGv37IP,'Til Tuesday,67OxSfZotEq8cCa5SCeX5r,Everything's Different Now,5G7NBdQLHc7GkehNmLG3yJ,3.0,Why Must I,221000.0,1.0,0.586,...,1.0,0.0268,0.229,0.838,96.486,0,0,13,C#,4
3,1L0y9srZMyh9XUnYGv37IP,'Til Tuesday,67OxSfZotEq8cCa5SCeX5r,Everything's Different Now,0J6nZDgEuwXtECq8Pukn1n,4.0,J For Jules,265493.0,1.0,0.475,...,1.0,0.0308,0.462,0.262,151.651,0,0,4,A#,4
4,1L0y9srZMyh9XUnYGv37IP,'Til Tuesday,67OxSfZotEq8cCa5SCeX5r,Everything's Different Now,3Lm7hSTabr01oav0WdSsuD,5.0,(Believed You Were) Lucky,216973.0,1.0,0.612,...,1.0,0.0321,0.448,0.495,124.315,0,0,10,F,4


## Analysis