In [5]:
import sys
import os
import json
import spotipy
import spotipy.util as sp_util
from spotipy.oauth2 import SpotifyClientCredentials, SpotifyOauthError
from spotipy.client import SpotifyException


import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F

import os
os.getcwd()

os.chdir('/Users/sean/PycharmProjects/Muser/4.453-Creative-ML-for-Design/spotify-api-starter-master/src')
os.getcwd()

'/Users/sean/PycharmProjects/Muser/4.453-Creative-ML-for-Design/spotify-api-starter-master/src'

In [6]:
from display_utils import (
    print_header,
    track_string,
    print_audio_features_for_track,
    print_audio_analysis_for_track,
    choose_tracks
    )

from common import (
    authenticate_client,
    authenticate_user,
    fetch_artists,
    fetch_artist_top_tracks
    )

# Define the scopes that we need access to
# https://developer.spotify.com/web-api/using-scopes/
scope = 'user-library-read playlist-read-private'

# import pandas as pd
# import numpy as np

dict_songs = {}

################################################################################
# API Fetch Functions
################################################################################

def get_audio_features(spotify, tracks, pretty_print=False):
    """
    Given a list of tracks, get and print the audio features for those tracks!
    :param spotify: An authenticated Spotipy instance
    :param tracks: A list of track dictionaries
    """
    if not tracks:
        print('No tracks provided.')
        return

    # Build a map of id->track so we can get the full track info later
    track_map = {track.get('id'): track for track in tracks}

    # Request the audio features for the chosen tracks (limited to 50)
    # print_header('Getting Audio Features...')
    tracks_features_response = spotify.audio_features(tracks=track_map.keys())
    track_features_map = {f.get('id'): f for f in tracks_features_response}

    # Iterate through the features and print the track and info
    if pretty_print:
        for track_id, track_features in track_features_map.items():
            # Print out the track info and audio features
            track = track_map.get(track_id)
            print_audio_features_for_track(track, track_features)

    return track_features_map

def get_audio_analysis(spotify, tracks, pretty_print=False):
    """
    Given a list of tracks, get and print the audio analysis for those tracks!
    :param spotify: An authenticated Spotipy instance
    :param tracks: A list of track dictionaries
    """
    if not tracks:
        print('No tracks provided.')
        return

    # Build a map of id->track so we can get the full track info later
    track_map = {track.get('id'): track for track in tracks}

    # Request the audio analysis for each track -- one at a time since these
    # can be really big
    tracks_analysis = {}

    # print_header('Getting Audio Audio Analysis...')
    for track_id in track_map.keys():
        analysis = spotify.audio_analysis(track_id)
        tracks_analysis[track_id] = analysis

        # Print out the track info and audio features
        if pretty_print:
            track = track_map.get(track_id)
            print_audio_analysis_for_track(track, analysis)

    dict_songs = tracks_analysis

    return tracks_analysis


################################################################################
# Demo Functions
################################################################################

def search_track(spotify):
    """
    This demo function will allow the user to search a song title and pick the song from a list in order to fetch
    the audio features/analysis of it
    :param spotify: An basic-authenticated spotipy client
    """
    keep_searching = True
    selected_track = None

    # Initialize Spotipy
    spotify = authenticate_client()

    # We want to make sure the search is correct
    while keep_searching:
        search_term = input('\nWhat song would you like to search: ')

        # Search spotify
        results = spotify.search(search_term)
        tracks = results.get('tracks', {}).get('items', [])

        if len(tracks) == 0:
            print_header('No results found for "{}"'.format(search_term))
        else:
            # Print the tracks
            print_header('Search results for "{}"'.format(search_term))
            for i, track in enumerate(tracks):
                print('  {}) {}'.format(i + 1, track_string(track)))

        # Prompt the user for a track number, "s", or "c"
        track_choice = input('\nChoose a track #, "s" to search again, or "c" to cancel: ')
        try:
            # Convert the input into an int and set the selected track
            track_index = int(track_choice) - 1
            selected_track = tracks[track_index]
            keep_searching = False
        except (ValueError, IndexError):
            # We didn't get a number.  If the user didn't say 'retry', then exit.
            if track_choice != 's':
                # Either invalid input or cancel
                if track_choice != 'c':
                    print('Error: Invalid input.')
                keep_searching = False

    # Quit if we don't have a selected track
    if selected_track is None:
        return

    # Request the features for this track from the spotify API
    # get_audio_features(spotify, [selected_track])

    print("selected_track", selected_track)
    return [selected_track]
    # return selected_track


def list_playlists(spotify, username):
    """
    Get all of a user's playlists and have them select tracks from a playlist
    """
    # Get all the playlists for this user
    playlists = []
    total = 1
    # The API paginates the results, so we need to iterate
    while len(playlists) < total:
        playlists_response = spotify.user_playlists(username, offset=len(playlists))
        playlists.extend(playlists_response.get('items', []))
        total = playlists_response.get('total')

    # Remove any playlists that we don't own
    playlists = [playlist for playlist in playlists if playlist.get('owner', {}).get('id') == username]

    # List out all of the playlists
    print_header('Your Playlists')
    for i, playlist in enumerate(playlists):
        print('  {}) {} - {}'.format(i + 1, playlist.get('name'), playlist.get('uri')))

    # Choose a playlist
    playlist_choice = int(input('\nChoose a playlist: '))
    playlist = playlists[playlist_choice - 1]
    playlist_owner = playlist.get('owner', {}).get('id')

    # Get the playlist tracks
    tracks = []
    total = 1
    # The API paginates the results, so we need to keep fetching until we have all of the items
    while len(tracks) < total:
        tracks_response = spotify.user_playlist_tracks(playlist_owner, playlist.get('id'), offset=len(tracks))
        tracks.extend(tracks_response.get('items', []))
        total = tracks_response.get('total')

    # Pull out the actual track objects since they're nested weird
    tracks = [track.get('track') for track in tracks]

    # Print out our tracks along with the list of artists for each
    print_header('Tracks in "{}"'.format(playlist.get('name')))

    # Let em choose the tracks
    selected_tracks = choose_tracks(tracks)

    return selected_tracks


def list_library(spotify, username):
    """
    Get all songs from tthe user's library and select from there
    """

    # Get all the playlists for this user
    tracks = []
    total = 1
    first_fetch = True
    # The API paginates the results, so we need to iterate
    while len(tracks) < total:
        tracks_response = spotify.current_user_saved_tracks(offset=len(tracks))
        tracks.extend(tracks_response.get('items', []))
        total = tracks_response.get('total')

        # Some users have a LOT of tracks.  Warn them that this might take a second
        if first_fetch and total > 150:
            print('\nYou have a lot of tracks saved - {} to be exact!\nGive us a second while we fetch them...'.format(
                total))
            first_fetch = False

    # Pull out the actual track objects since they're nested weird
    tracks = [track.get('track') for track in tracks]

    # Let em choose the tracks
    selected_tracks = choose_tracks(tracks)

    # # Print the audio features :)
    # get_audio_features(spotify, selected_tracks)

    return selected_tracks

In [7]:
spotify = authenticate_client()

# selected_tracks = search_track(spotify)

# suppose we assume that we choose the first song on the list

def print_audio_analysis(my_track):
    # print("my_track: ", my_track)

    audio_analysis_key = list(my_track.get('audio_analysis').keys())[0]
    # print(audio_analysis_key)


    my_track_audio_analysis = my_track.get('audio_analysis').get(audio_analysis_key)


    for key in list(my_track_audio_analysis.keys()):
        print("Feature: ", key)
        key_feature = my_track_audio_analysis.get(key)
        # print(type(key_feature))

        if type(key_feature) == list:
            print("Length:  ", len(key_feature))
        else:
            print("Length: ", 1)

        print("Audio Analysis: ", key_feature,'\n')


def get_music_features(my_song):

    search_item = my_song
    spotify = authenticate_client()
    item_list = spotify.search(search_item)
    selected_tracks = [item_list.get('tracks').get('items', [])[0]]

# print(type(selected_tracks))
# print(selected_tracks)

    my_track = selected_tracks[0]


    my_track['audio_features'] = get_audio_features(spotify, selected_tracks, pretty_print=False)

    my_track['audio_analysis'] = get_audio_analysis(spotify, selected_tracks, pretty_print=False)

    print('Song: ', my_song)


#     print('Audio Features: ', my_track.get('audio_features'))

#     print_audio_analysis(my_track)

    return my_track

In [101]:
music_data_csv = pd.read_csv('/Users/sean/PycharmProjects/Muser/4.453-Creative-ML-for-Design/music-list.csv')

In [108]:
music_data = music_data_csv.iloc[0:14, :]
music_data

Unnamed: 0,Calming/Comforting,artist,Arousing/Sport,artist.1,Love,artist.2
0,Cloudless,Sigrid Vass,My Way,Calvin Harris,First Date,
1,Colors of the Sun,Andri Hart,Kill Room,Delta Heavy,Nightlife,
2,In Trutina,Marcel Depius,Hype,Dizzee Rascal,The Smell of Your Skin,
3,Ali,Caleufu,Just Say,"KDA, Tinashe",Love by the Sea,
4,Port Alex,Altan,Good Grief,Bastille,Morena,
5,Poets,Ciaran Delany,Sweet Lies,"Wilkinson, Karen Harding",Louisiana,
6,East Wind,Hannu Esaer,Ordinary,Two Door Cinema Club,Forever Young in New Orleans,
7,Ambar,Patagonic,Will You Be There,The Sherlocks,A Cup of Jazz,
8,Sin Ropa,Calden,Want to Want Me,Miami Beatz,Romance,
9,Weightless,Taz Steen,Barbra Streisand,CDM Project,Sexual Healing,Marvin Gaye


In [109]:
music_data.columns

Index(['Calming/Comforting', 'artist', 'Arousing/Sport', 'artist.1', 'Love ',
       'artist.2'],
      dtype='object')

In [114]:
# song_list = ['lover', 'shape of you', '龙卷风']
song_list = music_data.loc[:,'Calming/Comforting'].tolist()
song_list = song_list + music_data.loc[:,'Arousing/Sport'].tolist()
song_list = song_list + music_data.loc[:,'Love '].tolist()
song_list

['Cloudless',
 'Colors of the Sun',
 'In Trutina',
 'Ali',
 'Port Alex',
 'Poets',
 'East Wind',
 'Ambar',
 'Sin Ropa',
 'Weightless',
 'Nights',
 'Parents',
 'Dancing Feet',
 'Castle of Ice',
 'My Way',
 'Kill Room',
 'Hype',
 'Just Say',
 'Good Grief',
 'Sweet Lies',
 'Ordinary',
 'Will You Be There',
 'Want to Want Me',
 'Barbra Streisand',
 'All About That Bass',
 'Hot n Cold',
 'Infinity 2008',
 'Geronimo',
 'First Date',
 'Nightlife',
 'The Smell of Your Skin',
 'Love by the Sea',
 'Morena',
 'Louisiana',
 'Forever Young in New Orleans',
 'A Cup of Jazz ',
 'Romance',
 'Sexual Healing',
 "Let's Get It On",
 'Go Down On You',
 'Hey Baby',
 'Kisses']

In [115]:
song_features = {}

for my_song in song_list:
    my_track = get_music_features(my_song)
    song_features[my_song] = my_track

# print(song_features)
# get_music_features('shape of you')

Song:  Cloudless
Song:  Colors of the Sun
Song:  In Trutina
Song:  Ali
Song:  Port Alex
Song:  Poets
Song:  East Wind
Song:  Ambar
Song:  Sin Ropa
Song:  Weightless
Song:  Nights
Song:  Parents
Song:  Dancing Feet
Song:  Castle of Ice
Song:  My Way
Song:  Kill Room
Song:  Hype
Song:  Just Say
Song:  Good Grief
Song:  Sweet Lies
Song:  Ordinary
Song:  Will You Be There
Song:  Want to Want Me
Song:  Barbra Streisand
Song:  All About That Bass
Song:  Hot n Cold
Song:  Infinity 2008
Song:  Geronimo
Song:  First Date
Song:  Nightlife
Song:  The Smell of Your Skin
Song:  Love by the Sea
Song:  Morena
Song:  Louisiana
Song:  Forever Young in New Orleans
Song:  A Cup of Jazz 
Song:  Romance
Song:  Sexual Healing
Song:  Let's Get It On
Song:  Go Down On You
Song:  Hey Baby
Song:  Kisses


In [116]:
song_name_list = list(song_features.keys())
song_name_list

['Cloudless',
 'Colors of the Sun',
 'In Trutina',
 'Ali',
 'Port Alex',
 'Poets',
 'East Wind',
 'Ambar',
 'Sin Ropa',
 'Weightless',
 'Nights',
 'Parents',
 'Dancing Feet',
 'Castle of Ice',
 'My Way',
 'Kill Room',
 'Hype',
 'Just Say',
 'Good Grief',
 'Sweet Lies',
 'Ordinary',
 'Will You Be There',
 'Want to Want Me',
 'Barbra Streisand',
 'All About That Bass',
 'Hot n Cold',
 'Infinity 2008',
 'Geronimo',
 'First Date',
 'Nightlife',
 'The Smell of Your Skin',
 'Love by the Sea',
 'Morena',
 'Louisiana',
 'Forever Young in New Orleans',
 'A Cup of Jazz ',
 'Romance',
 'Sexual Healing',
 "Let's Get It On",
 'Go Down On You',
 'Hey Baby',
 'Kisses']

In [117]:
# lover = song_features.get('lover')
# soy = song_features.get('shape of you')
# ljf = song_features.get('龙卷风')

song_obj_list = []
for item in song_name_list:
    song_obj_list.append(song_features.get(item))

# lover.keys()
song_obj_list[1].keys()

dict_keys(['album', 'artists', 'available_markets', 'disc_number', 'duration_ms', 'explicit', 'external_ids', 'external_urls', 'href', 'id', 'is_local', 'name', 'popularity', 'preview_url', 'track_number', 'type', 'uri', 'audio_features', 'audio_analysis'])

In [118]:
# lover_id = song_features.get('lover').get('id')
# soy_id = song_features.get('shape of you').get('id')
# ljf_id = song_features.get('龙卷风').get('id')

song_id_list = []
for item in song_obj_list:
    song_id_list.append(item.get('id'))

song_id_list

['76PhNwBIgoytLw14EXOLXs',
 '3H1TT6j9G6hQDOHWRXb3xm',
 '4UMHyDQfz4ynsjVHTLcqRo',
 '1gGhqfs2pDTOI30AEXMXrn',
 '5OPoGFel9ubOb09aQT55yw',
 '7MIxMXrks1jV81Vid5068Q',
 '1GVjLE6wFF2JRb1SIGPcFA',
 '63xWKZ3nnuCpqgIFeInmQ2',
 '2tVNCE8ugILalQhS838rfU',
 '6kkwzB6hXLIONkEk9JciA6',
 '7eqoqGkKwgOaWNNHx90uEZ',
 '68rVGSTnCiFOET9k5Vd8Se',
 '64u6iR0gpP2a4T1P5HRbu1',
 '48eB4f7ZEs26kBOQhHOYMK',
 '3spdoTYpuCpmq19tuD0bOe',
 '1GT5zj3zDz5x3TK2hBNWYe',
 '5NFYuqu8V6QXc6mzcLImd6',
 '6gNTUALYq5cQXVCwXR7snp',
 '1oxOiOjsi7plNOZEhoPLPj',
 '4KMkvQ4pukipifwyjc8fAH',
 '15k2nBQJ0teDmPZHrOXL2N',
 '0MrDqmqbP0xS9sGedtiuAC',
 '7oGZAicScQt96OAW4AruYy',
 '5WZXfM7kHpZi7Zi6XlFgGy',
 '5jE48hhRu8E6zBDPRSkEq7',
 '1y4eb6hmAvsqlDOl3fx9kk',
 '3aQz0z86zrKjd1mcZlonxE',
 '19cL3SOKpwnwoKkII7U3Wh',
 '1fJFuvU2ldmeAm5nFIHcPP',
 '2gTVUNQeGtHXKWlrgcXcvG',
 '5dC64QrQ1q27D7f7JC88AA',
 '2aaL3K3Nm9kWEj5D6bOkYr',
 '3osYhl8lGmLJYASrrh9Y5f',
 '50PU05RTGva8laKDwxED9Y',
 '2of1WhuaEfI7A3WBAdmvkt',
 '1MBLk5kgS8kI26hoHZxynn',
 '33BCQDhwZyiudb8Mud33tb',
 

In [119]:
print("Popularity")
for i, item in enumerate(song_name_list):
    print(item, ': ', song_obj_list[i].get('popularity'))
    
# print('lover: ', lover.get('popularity'))
# print('shape of you: ', soy.get('popularity'))
# print('龙卷风: ', ljf.get('popularity'))

Popularity
Cloudless :  58
Colors of the Sun :  55
In Trutina :  55
Ali :  76
Port Alex :  52
Poets :  53
East Wind :  51
Ambar :  51
Sin Ropa :  51
Weightless :  67
Nights :  77
Parents :  73
Dancing Feet :  52
Castle of Ice :  58
My Way :  72
Kill Room :  34
Hype :  60
Just Say :  52
Good Grief :  62
Sweet Lies :  50
Ordinary :  68
Will You Be There :  70
Want to Want Me :  75
Barbra Streisand :  49
All About That Bass :  70
Hot n Cold :  66
Infinity 2008 :  67
Geronimo :  63
First Date :  73
Nightlife :  49
The Smell of Your Skin :  1
Love by the Sea :  32
Morena :  56
Louisiana :  63
Forever Young in New Orleans :  0
A Cup of Jazz  :  42
Romance :  70
Sexual Healing :  70
Let's Get It On :  62
Go Down On You :  41
Hey Baby :  64
Kisses :  54


In [120]:
print("audio_features")
for i, item in enumerate(song_obj_list):
    print(song_name_list[i], ": \n",  item.get('audio_features').get(song_id_list[i]), '\n')
    
print("Keys of audio features for each song: \n", 
      list(song_obj_list[0].get('audio_features').get(song_id_list[0]).keys()))    

# print('shape of you: \n', soy.get('audio_features').get(soy_id), '\n')
# print('龙卷风: \n', ljf.get('audio_features').get(ljf_id), '\n')

audio_features
Cloudless : 
 {'danceability': 0.42, 'energy': 0.156, 'key': 9, 'loudness': -23.505, 'mode': 1, 'speechiness': 0.0359, 'acousticness': 0.941, 'instrumentalness': 0.95, 'liveness': 0.11, 'valence': 0.133, 'tempo': 134.623, 'type': 'audio_features', 'id': '76PhNwBIgoytLw14EXOLXs', 'uri': 'spotify:track:76PhNwBIgoytLw14EXOLXs', 'track_href': 'https://api.spotify.com/v1/tracks/76PhNwBIgoytLw14EXOLXs', 'analysis_url': 'https://api.spotify.com/v1/audio-analysis/76PhNwBIgoytLw14EXOLXs', 'duration_ms': 143032, 'time_signature': 4} 

Colors of the Sun : 
 {'danceability': 0.501, 'energy': 0.0449, 'key': 0, 'loudness': -16.176, 'mode': 1, 'speechiness': 0.0707, 'acousticness': 0.834, 'instrumentalness': 0.554, 'liveness': 0.111, 'valence': 0.242, 'tempo': 129.278, 'type': 'audio_features', 'id': '3H1TT6j9G6hQDOHWRXb3xm', 'uri': 'spotify:track:3H1TT6j9G6hQDOHWRXb3xm', 'track_href': 'https://api.spotify.com/v1/tracks/3H1TT6j9G6hQDOHWRXb3xm', 'analysis_url': 'https://api.spotify.com/

In [121]:
# lover.get('audio_analysis').get(lover_id)
# print('shape of you: \n', soy.get('audio_analysis').get(soy_id), '\n')
# print('龙卷风: \n', ljf.get('audio_analysis').get(ljf_id), '\n')

song_id = 0

print("audio_analysis \n")

# print("Keys of audio features for each song: \n", 
#       list(song_obj_list[0].get('audio_features').get(song_id_list[0]).keys())) 
print('Song: ', song_name_list[song_id], '\n')
print('Keys of audio analysis: \n', 
      list(song_obj_list[song_id].get('audio_analysis').get(song_id_list[song_id]).keys()), '\n')


song_audio_analysis = song_obj_list[song_id].get('audio_analysis').get(song_id_list[song_id])

for key in song_audio_analysis.keys():
    print(key)
    analysis = song_audio_analysis.get(key)
    print("Length: ", len(analysis))
    
    try:
        print("Features: ", list(analysis.keys()))
    except:
        print("Features: ", list(analysis[0].keys()))
    print("\n")


audio_analysis 

Song:  Cloudless 

Keys of audio analysis: 
 ['meta', 'track', 'bars', 'beats', 'tatums', 'sections', 'segments'] 

meta
Length:  7
Features:  ['analyzer_version', 'platform', 'detailed_status', 'status_code', 'timestamp', 'analysis_time', 'input_process']


track
Length:  26
Features:  ['num_samples', 'duration', 'sample_md5', 'offset_seconds', 'window_seconds', 'analysis_sample_rate', 'analysis_channels', 'end_of_fade_in', 'start_of_fade_out', 'loudness', 'tempo', 'tempo_confidence', 'time_signature', 'time_signature_confidence', 'key', 'key_confidence', 'mode', 'mode_confidence', 'codestring', 'code_version', 'echoprintstring', 'echoprint_version', 'synchstring', 'synch_version', 'rhythmstring', 'rhythm_version']


bars
Length:  80
Features:  ['start', 'duration', 'confidence']


beats
Length:  320
Features:  ['start', 'duration', 'confidence']


tatums
Length:  640
Features:  ['start', 'duration', 'confidence']


sections
Length:  7
Features:  ['start', 'duration',

### Use Tatums as an example

In [122]:
# If we just use one feature for now, say, tatums
import numpy as np
tatum_list = []

for song_id in range(len(song_id_list)):
    tatum_start_list = pd.DataFrame(
    song_obj_list[song_id].get('audio_analysis').get(song_id_list[song_id]).get('tatums')).start.tolist()

    tatum_list.append(tatum_start_list)
    
X_tatum = np.array(tatum_list)
X_tatum.shape[0]

42

In [123]:
max_len = 0
temp_len = 0
for i in range(X_tatum.shape[0]):
    max_len = len(X_tatum[i])
    if max_len < temp_len:
        max_len = temp_len
    else:
        temp_len = max_len
#     print(max_len)
max_len

1622

In [126]:
y = ['calm']*14 + ['sport']*14 + ['Love']*14
y

['calm',
 'calm',
 'calm',
 'calm',
 'calm',
 'calm',
 'calm',
 'calm',
 'calm',
 'calm',
 'calm',
 'calm',
 'calm',
 'calm',
 'sport',
 'sport',
 'sport',
 'sport',
 'sport',
 'sport',
 'sport',
 'sport',
 'sport',
 'sport',
 'sport',
 'sport',
 'sport',
 'sport',
 'Love',
 'Love',
 'Love',
 'Love',
 'Love',
 'Love',
 'Love',
 'Love',
 'Love',
 'Love',
 'Love',
 'Love',
 'Love',
 'Love']

In [171]:
from sklearn.preprocessing import LabelEncoder
from keras.utils import np_utils

# encode class values as integers
encoder = LabelEncoder()
encoder.fit(y)
encoded_Y = encoder.transform(y)
encoded_Y
# convert integers to dummy variables (i.e. one hot encoded)
dummy_y = np_utils.to_categorical(encoded_Y)
dummy_y

# from keras.utils import to_categorical
# Y = [0,1,3]
# y_binary = to_categorical(Y)
# y_binary

array([[0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.]], dtype=float32)

### Machine Learning

In [79]:
import tensorflow as tf
import numpy
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
# fix random seed for reproducibility
numpy.random.seed(7)

Using TensorFlow backend.


In [172]:
# load the dataset but only keep the top n words, zero the rest
# top_words = 5000
# (X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words)

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_tatum, dummy_y, test_size = 0.2, random_state = 0)

y_test


array([[1., 0., 0.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 1., 0.],
       [1., 0., 0.]], dtype=float32)

In [173]:
X_train.shape

(33,)

In [174]:
for i in range(len(X_train)):
    print(len(X_train[i]))

424
518
1100
276
584
1164
820
1550
778
902
1622
498
578
648
574
1176
688
198
344
770
594
808
516
766
830
364
1126
352
720
1146
996
702
640


In [175]:
# truncate and pad input sequences
# max_review_length = 500
X_train = sequence.pad_sequences(X_train, maxlen=max_len)
X_test = sequence.pad_sequences(X_test, maxlen=max_len)

# for i in range(len(X_train)):
#     print(len(X_train[i]))

X_train

array([[  0,   0,   0, ..., 170, 170, 171],
       [  0,   0,   0, ..., 148, 148, 149],
       [  0,   0,   0, ..., 220, 220, 221],
       ...,
       [  0,   0,   0, ..., 232, 233, 233],
       [  0,   0,   0, ..., 161, 161, 162],
       [  0,   0,   0, ..., 142, 142, 142]], dtype=int32)

In [183]:


# create the model
input_size = 500
embedding_vecor_length = 32
model = Sequential()
model.add(Embedding(input_size, embedding_vecor_length, input_length=max_len))
model.add(LSTM(500))
model.add(Dense(100, activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(3, activation='sigmoid'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())
model.fit(X_train, y_train, epochs=3, batch_size=64)
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

Model: "sequential_13"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_13 (Embedding)     (None, 1622, 32)          16000     
_________________________________________________________________
lstm_13 (LSTM)               (None, 500)               1066000   
_________________________________________________________________
dense_14 (Dense)             (None, 100)               50100     
_________________________________________________________________
dense_15 (Dense)             (None, 50)                5050      
_________________________________________________________________
dense_16 (Dense)             (None, 3)                 153       
Total params: 1,137,303
Trainable params: 1,137,303
Non-trainable params: 0
_________________________________________________________________
None


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 1/3
Epoch 2/3
Epoch 3/3
Accuracy: 11.11%
