In [29]:
import csv
from collections import OrderedDict
import json
import matplotlib.pyplot as plt

#Opens databases

with open('data.csv') as f:
    # we are using DictReader because we want our information to be in dictionary format.
    reader = csv.DictReader(f)
    # some more code
    albums = []
    for album in reader:
        albums.append(OrderedDict(
            {'number': album['number'], 'year': album['year'], 'album': album['album'], 'artist': album['artist'],
             'genre': album['genre'], 'subgenre': album['subgenre']}))

def import_song_list():
    # open the text file in read
    text_file = open('top-500-songs.txt', 'r')
    # read each line of the text file
    # here is where you can print out the lines to your terminal and get an idea 
    # for how you might think about re-formatting the data
    lines = text_file.readlines()
    song_list = []
    for line in lines:
        clean_line = line.replace("\n", "")
        song_chart_keys = ["rank", "name", "artist", "year"]
        song_chart_values = clean_line.split("\t")
        song_list.append(dict(zip(song_chart_keys, song_chart_values))) 
    return song_list



file = open('track_data.json', 'r')
discography = json.load(file)
songs = import_song_list()


#Begin Functions V1

def find_by_name(name):
    for album in albums:
        if album['album'] == name:
            return album


def find_by_rank(rank):
    for album in albums:
        if int(album['number']) == rank:
            return album

def find_by_year(year):
    all_albums_in_year = []
    for album in albums:
        if int(album['year']) == year:
            all_albums_in_year.append(album)
    return all_albums_in_year

def find_by_years(start_year, end_year):
    all_albums_within_years = []
    for album in albums:
        if int(album['year']) >= start_year and int(album['year']) <= end_year:
            all_albums_within_years.append(album)
    return all_albums_within_years

def find_by_ranks(start_rank, end_rank):
    all_albums_within_ranks = []
    for album in albums:
        if int(album['number']) >= start_rank and int(album['number']) <= end_rank:
            all_albums_within_ranks.append(album)
    return all_albums_within_ranks

def all_titles():
    every_album_title = []
    for album in albums:
        every_album_title.append(album['album'])
    return every_album_title

def all_artists():
    every_album_artist = []
    for album in albums:
        every_album_artist.append(album['artist'])
    return every_album_artist

def most_albums():
    every_artist = list(all_artists())
    unique_artists = set(all_artists())
    artists_counts = []
    raw_counts = []
    most_artists = []
    for artist in unique_artists:
        count = all_artists().count(artist)
        artists_counts.append([artist, count])
        raw_counts.append(count)
    most = max(raw_counts)
    for artist_count in artists_counts:
        if most == artist_count[1]:
            most_artists.append(str(artist_count[0]+ " - " + str(artist_count[1])))
    return most_artists

def genre_list():
    genre_list = []
    for album in albums:
        raw_genre = album["genre"]
        split = raw_genre.replace("& ", "").split(", ")
        genre_list.extend(split)
#         genre_list.append(album["genre"])
        
    return genre_list

def all_years():
    every_album_year = []
    for album in albums:
        every_album_year.append(int(album['year']))
    return every_album_year
def all_decades():
    all_years_list = list(all_years())
    every_album_decade = []
    for year in all_years_list:
        every_album_decade.append(year - year % 10)
    return every_album_decade
def hist_of_albums_by_decade():
    import matplotlib.pyplot as plt
    %matplotlib inline
    
    list_bins = sorted(list(set(all_decades())))
    years_list = all_years()
    plt.hist(years_list, bins = list_bins)
    
    plt.xlabel('Decades')
    plt.ylabel('Frequency of Decades')
    plt.title('Number of Albums released by Decade')
    plt.show()

# Set plot space as inline for inline plots and qt for external plots
%matplotlib inline

def genre_counts():
    genre_list = []
    genre_counts = []
    for album in albums:
        raw_genre = album["genre"]
        split = raw_genre.replace("& ", "").replace(", ", ",").split(",")
        genre_list.extend(split)
    unique_genres = list(set(genre_list))
    sorted_genres = sorted(unique_genres)
    for genre in sorted_genres:
        genre_counts.append(genre_list.count(genre))
    return [sorted_genres, genre_counts]
genre_counts()

# Use the bar() function to create a plot using the above values of x and y. Add a label.
def genre_bar_graph():
    plt.figure(figsize=(14,6))
    plt.bar(genre_counts()[0], genre_counts()[1], label='Genre Counts')
    plt.xlabel('Genres')
    plt.ylabel('Counts')
    plt.title('Genre Counts')
    plt.legend()
    # Output the final plot
    plt.show()
    

#Revised Functions
def find_by_individual_value(file, key, value):
    all_values = []
    for f in file:
        if f[key] == value:
            return f
        
def find_by_year(file, year):
    all_in_year = []
    for f in file:
        if int(f['year']) == year:
            all_in_year.append(f)
    return all_in_year

def find_by_years(file, start_year, end_year):
    all_within_years = []
    for f in file:
        if int(f['year']) >= start_year and int(f['year']) <= end_year:
            all_within_years.append(f)
    return all_within_years

def find_by_ranks(file, start_rank, end_rank):
    all_within_ranks = []
    for f in file:
        if int(f['rank']) >= start_rank and int(f['rank']) <= end_rank:
            all_within_ranks.append(f)
    return all_within_ranks

find_by_ranks(songs, 1,20)

#This be experimental

def album_top_songs():
    top_songs_list = []
    top_song_count = []
    for song in songs:
        top_songs_list.append(song['name'])

    for disc in discography:
        song_list = disc["tracks"]
        count = 0
        for song in disc:
            if song.replace(' - Remastered', '') in top_songs_list:
                count += 1
        top_song_count.append([disc["album"], count])
    return top_song_count
    
def songs_on_top_albums():
    songs_on_top_albums = []
    album_titles = all_titles()
    for album_title in album_titles:
        for disc in discography:
            if album_title == disc['album']:
                songs_on_top_albums.append(disc['tracks'])
            else:
                songs_on_top_albums.append('Nada')
    return album_titles

songs_on_top_albums()

FileNotFoundError: [Errno 2] No such file or directory: 'data.csv'

In [30]:
def songs_on_top_albums():
    songs_on_top_albums = []
    album_titles = all_titles()
    for album_title in album_titles:
        for disc in discography:
            if album_title == disc['album']:
                songs_on_top_albums.append(disc['tracks'])
            else:
                songs_on_top_albums.append('Nada')
    return album_titles

songs_on_top_albums()

SyntaxError: invalid character in identifier (<ipython-input-30-e31117133c85>, line 11)