In [76]:
import csv
from collections import OrderedDict
import json
import matplotlib.pyplot as plt

#Opens databases

with open('data.csv') as f:
    # we are using DictReader because we want our information to be in dictionary format.
    reader = csv.DictReader(f)
    # some more code
    albums = []
    for album in reader:
        albums.append(OrderedDict(
            {'number': album['number'], 'year': album['year'], 'album': album['album'], 'artist': album['artist'],
             'genre': album['genre'], 'subgenre': album['subgenre']}))

def import_song_list():
    # open the text file in read
    text_file = open('top-500-songs.txt', 'r')
    # read each line of the text file
    # here is where you can print out the lines to your terminal and get an idea 
    # for how you might think about re-formatting the data
    lines = text_file.readlines()
    song_list = []
    for line in lines:
        clean_line = line.replace("\n", "")
        song_chart_keys = ["rank", "name", "artist", "year"]
        song_chart_values = clean_line.split("\t")
        song_list.append(dict(zip(song_chart_keys, song_chart_values))) 
    return song_list



file = open('track_data.json', 'r')
discography = json.load(file)
songs = import_song_list()

#Begin Functions V1

def find_by_name(name):
    for album in albums:
        if album['album'] == name:
            return album


def find_by_rank(rank):
    for album in albums:
        if int(album['number']) == rank:
            return album

def find_by_year(year):
    all_albums_in_year = []
    for album in albums:
        if int(album['year']) == year:
            all_albums_in_year.append(album)
    return all_albums_in_year

def find_by_years(start_year, end_year):
    all_albums_within_years = []
    for album in albums:
        if int(album['year']) >= start_year and int(album['year']) <= end_year:
            all_albums_within_years.append(album)
    return all_albums_within_years

def find_by_ranks(start_rank, end_rank):
    all_albums_within_ranks = []
    for album in albums:
        if int(album['number']) >= start_rank and int(album['number']) <= end_rank:
            all_albums_within_ranks.append(album)
    return all_albums_within_ranks

def all_titles():
    every_album_title = []
    for album in albums:
        every_album_title.append(album['album'])
    return every_album_title

def all_artists():
    every_album_artist = []
    for album in albums:
        every_album_artist.append(album['artist'])
    return every_album_artist

def most_albums():
    every_artist = list(all_artists())
    unique_artists = set(all_artists())
    artists_counts = []
    raw_counts = []
    most_artists = []
    for artist in unique_artists:
        count = all_artists().count(artist)
        artists_counts.append([artist, count])
        raw_counts.append(count)
    most = max(raw_counts)
    for artist_count in artists_counts:
        if most == artist_count[1]:
            most_artists.append(str(artist_count[0]+ " - " + str(artist_count[1])))
    return most_artists

def genre_list():
    genre_list = []
    for album in albums:
        raw_genre = album["genre"]
        split = raw_genre.replace("& ", "").split(", ")
        genre_list.extend(split)
#         genre_list.append(album["genre"])
        
    return genre_list

def all_years():
    every_album_year = []
    for album in albums:
        every_album_year.append(int(album['year']))
    return every_album_year
def all_decades():
    all_years_list = list(all_years())
    every_album_decade = []
    for year in all_years_list:
        every_album_decade.append(year - year % 10)
    return every_album_decade
def hist_of_albums_by_decade():
    import matplotlib.pyplot as plt
    %matplotlib inline
    
    list_bins = sorted(list(set(all_decades())))
    years_list = all_years()
    plt.hist(years_list, bins = list_bins)
    
    plt.xlabel('Decades')
    plt.ylabel('Frequency of Decades')
    plt.title('Number of Albums released by Decade')
    plt.show()

# Set plot space as inline for inline plots and qt for external plots
%matplotlib inline

def genre_counts():
    genre_list = []
    genre_counts = []
    for album in albums:
        raw_genre = album["genre"]
        split = raw_genre.replace("& ", "").replace(", ", ",").split(",")
        genre_list.extend(split)
    unique_genres = list(set(genre_list))
    sorted_genres = sorted(unique_genres)
    for genre in sorted_genres:
        genre_counts.append(genre_list.count(genre))
    return [sorted_genres, genre_counts]
genre_counts()

# Use the bar() function to create a plot using the above values of x and y. Add a label.
def genre_bar_graph():
    plt.figure(figsize=(14,6))
    plt.bar(genre_counts()[0], genre_counts()[1], label='Genre Counts')
    plt.xlabel('Genres')
    plt.ylabel('Counts')
    plt.title('Genre Counts')
    plt.legend()
    # Output the final plot
    plt.show()
    

#Revised Functions
def find_by_individual_value(file, key, value):
    all_values = []
    for f in file:
        if f[key] == value:
            return f
        
def find_by_year(file, year):
    all_in_year = []
    for f in file:
        if int(f['year']) == year:
            all_in_year.append(f)
    return all_in_year

def find_by_years(file, start_year, end_year):
    all_within_years = []
    for f in file:
        if int(f['year']) >= start_year and int(f['year']) <= end_year:
            all_within_years.append(f)
    return all_within_years

def find_by_ranks(file, start_rank, end_rank):
    all_within_ranks = []
    for f in file:
        if int(f['rank']) >= start_rank and int(f['rank']) <= end_rank:
            all_within_ranks.append(f)
    return all_within_ranks

find_by_ranks(songs, 1,20)

#This be experimental

def album_top_songs():
    top_songs_list = []
    top_song_count = []
    for song in songs:
        top_songs_list.append(song['name'])

    for disc in discography:
        song_list = disc["tracks"]
        count = 0
        for song in disc:
            if song.replace(' - Remastered', '') in top_songs_list:
                count += 1
        top_song_count.append([disc["album"], count])
    return top_song_count
    
def songs_on_top_albums():
    songs_on_top_albums = []
    album_titles = all_titles()
    for album_title in album_titles:
        for disc in discography:
            if album_title == disc['album']:
                songs_on_top_albums.append(disc['tracks'])
            else:
                songs_on_top_albums.append('Nada')
    return songs_on_top_albums

[["Sgt. Pepper's Lonely Hearts Club Band - Remix",
  'With A Little Help From My Friends - Remix',
  'Lucy In The Sky With Diamonds - Remix',
  'Getting Better - Remix',
  'Fixing A Hole - Remix',
  "She's Leaving Home - Remix",
  'Being For The Benefit Of Mr. Kite! - Remix',
  'Within You Without You - Remix',
  "When I'm Sixty-Four - Remix",
  'Lovely Rita - Remix',
  'Good Morning Good Morning - Remix',
  "Sgt. Pepper's Lonely Hearts Club Band (Reprise) - Remix",
  'A Day In The Life - Remix',
  "Sgt. Pepper's Lonely Hearts Club Band - Take 9 And Speech",
  'With A Little Help From My Friends - Take 1 / False Start And Take 2 / Instrumental',
  'Lucy In The Sky With Diamonds - Take 1',
  'Getting Better - Take 1 / Instrumental And Speech At The End',
  'Fixing A Hole - Speech And Take 3',
  "She's Leaving Home - Take 1 / Instrumental",
  'Being For The Benefit Of Mr. Kite! - Take 4',
  'Within You Without You - Take 1 / Indian Instruments',
  "When I'm Sixty-Four - Take 2",
  'Lovel

In [None]:
import csv
from collections import OrderedDict
import json
import matplotlib.pyplot as plt

#Opens databases

with open('data.csv') as f:
    # we are using DictReader because we want our information to be in dictionary format.
    reader = csv.DictReader(f)
    # some more code
    albums = []
    for album in reader:
        albums.append(OrderedDict(
            {'number': album['number'], 'year': album['year'], 'album': album['album'], 'artist': album['artist'],
             'genre': album['genre'], 'subgenre': album['subgenre']}))

def import_song_list():
    # open the text file in read
    text_file = open('top-500-songs.txt', 'r')
    # read each line of the text file
    # here is where you can print out the lines to your terminal and get an idea 
    # for how you might think about re-formatting the data
    lines = text_file.readlines()
    song_list = []
    for line in lines:
        clean_line = line.replace("\n", "")
        song_chart_keys = ["rank", "name", "artist", "year"]
        song_chart_values = clean_line.split("\t")
        song_list.append(dict(zip(song_chart_keys, song_chart_values))) 
    return song_list



file = open('track_data.json', 'r')
discography = json.load(file)
songs = import_song_list()
discography

In [None]:
#Begin Functions V1

def find_by_name(name):
    for album in albums:
        if album['album'] == name:
            return album


def find_by_rank(rank):
    for album in albums:
        if int(album['number']) == rank:
            return album

def find_by_year(year):
    all_albums_in_year = []
    for album in albums:
        if int(album['year']) == year:
            all_albums_in_year.append(album)
    return all_albums_in_year

def find_by_years(start_year, end_year):
    all_albums_within_years = []
    for album in albums:
        if int(album['year']) >= start_year and int(album['year']) <= end_year:
            all_albums_within_years.append(album)
    return all_albums_within_years

def find_by_ranks(start_rank, end_rank):
    all_albums_within_ranks = []
    for album in albums:
        if int(album['number']) >= start_rank and int(album['number']) <= end_rank:
            all_albums_within_ranks.append(album)
    return all_albums_within_ranks

In [None]:
# All Functions

def all_titles():
    every_album_title = []
    for album in albums:
        every_album_title.append(album['album'])
    return every_album_title

def all_artists():
    every_album_artist = []
    for album in albums:
        every_album_artist.append(album['artist'])
    return every_album_artist

In [None]:
# Questions to Answer / Functions

def most_albums():
    every_artist = list(all_artists())
    unique_artists = set(all_artists())
    artists_counts = []
    raw_counts = []
    most_artists = []
    for artist in unique_artists:
        count = all_artists().count(artist)
        artists_counts.append([artist, count])
        raw_counts.append(count)
    most = max(raw_counts)
    for artist_count in artists_counts:
        if most == artist_count[1]:
            most_artists.append(str(artist_count[0]+ " - " + str(artist_count[1])))
    return most_artists

def genre_list():
    genre_list = []
    for album in albums:
        raw_genre = album["genre"]
        split = raw_genre.replace("& ", "").split(", ")
        genre_list.extend(split)
#         genre_list.append(album["genre"])
        
    return genre_list

def all_years():
    every_album_year = []
    for album in albums:
        every_album_year.append(int(album['year']))
    return every_album_year
def all_decades():
    all_years_list = list(all_years())
    every_album_decade = []
    for year in all_years_list:
        every_album_decade.append(year - year % 10)
    return every_album_decade
def hist_of_albums_by_decade():
    import matplotlib.pyplot as plt
    %matplotlib inline
    
    list_bins = sorted(list(set(all_decades())))
    years_list = all_years()
    plt.hist(years_list, bins = list_bins)
    
    plt.xlabel('Decades')
    plt.ylabel('Frequency of Decades')
    plt.title('Number of Albums released by Decade')
    plt.show()

# Set plot space as inline for inline plots and qt for external plots
%matplotlib inline

def genre_counts():
    genre_list = []
    genre_counts = []
    for album in albums:
        raw_genre = album["genre"]
        split = raw_genre.replace("& ", "").replace(", ", ",").split(",")
        genre_list.extend(split)
    unique_genres = list(set(genre_list))
    sorted_genres = sorted(unique_genres)
    for genre in sorted_genres:
        genre_counts.append(genre_list.count(genre))
    return [sorted_genres, genre_counts]
genre_counts()

# Use the bar() function to create a plot using the above values of x and y. Add a label.
def genre_bar_graph():
    plt.figure(figsize=(14,6))
    plt.bar(genre_counts()[0], genre_counts()[1], label='Genre Counts')
    plt.xlabel('Genres')
    plt.ylabel('Counts')
    plt.title('Genre Counts')
    plt.legend()
    # Output the final plot
    plt.show()

In [5]:
#Revised Functions
def find_by_individual_value(file, key, value):
    all_values = []
    for f in file:
        if f[key] == value:
            return f
        
def find_by_year(file, year):
    all_in_year = []
    for f in file:
        if int(f['year']) == year:
            all_in_year.append(f)
    return all_in_year

def find_by_years(file, start_year, end_year):
    all_within_years = []
    for f in file:
        if int(f['year']) >= start_year and int(f['year']) <= end_year:
            all_within_years.append(f)
    return all_within_years

def find_by_ranks(file, start_rank, end_rank):
    all_within_ranks = []
    for f in file:
        if int(f['rank']) >= start_rank and int(f['rank']) <= end_rank:
            all_within_ranks.append(f)
    return all_within_ranks

In [25]:
def all_value(data, value):
    every_value = []
    for d in data:
        every_value.append(d[value])
    return every_value

**Final Album Functions**

In [32]:
# Cleaning Functions
discography
# songs
# albums

[{'artist': 'The Beatles',
  'album': "Sgt. Pepper's Lonely Hearts Club Band",
  'tracks': ["Sgt. Pepper's Lonely Hearts Club Band - Remix",
   'With A Little Help From My Friends - Remix',
   'Lucy In The Sky With Diamonds - Remix',
   'Getting Better - Remix',
   'Fixing A Hole - Remix',
   "She's Leaving Home - Remix",
   'Being For The Benefit Of Mr. Kite! - Remix',
   'Within You Without You - Remix',
   "When I'm Sixty-Four - Remix",
   'Lovely Rita - Remix',
   'Good Morning Good Morning - Remix',
   "Sgt. Pepper's Lonely Hearts Club Band (Reprise) - Remix",
   'A Day In The Life - Remix',
   "Sgt. Pepper's Lonely Hearts Club Band - Take 9 And Speech",
   'With A Little Help From My Friends - Take 1 / False Start And Take 2 / Instrumental',
   'Lucy In The Sky With Diamonds - Take 1',
   'Getting Better - Take 1 / Instrumental And Speech At The End',
   'Fixing A Hole - Speech And Take 3',
   "She's Leaving Home - Take 1 / Instrumental",
   'Being For The Benefit Of Mr. Kite! - 

In [120]:
def album_top_songs():
    top_song_count = []
    top_song_list = all_value(songs, 'name')
    top_artist_list = all_value(songs, 'artist')
    top_song_list_with_artists = list(zip(top_artist_list, top_song_list))
    all_song_list = all_value(discography, 'tracks')

    count_list = []
    for number, alb in enumerate(all_song_list):
        count = 0
        for individual_song in alb:
            for song in top_song_list:
                if song == individual_song:
                    count += 1
        if count != 0:
            count_dict = {'Artist': discography[number]['artist'], 'Album': discography[number]['album'], 'Count': count}
            count_list.append(count_dict)
            
    alb_list = []
    for alb in count_list:
        alb_list.append(alb['Count'])
        
    maximum = max(alb_list)
    max_list = []
    for alb in count_list:
        if alb['Count'] == maximum:
            max_list.append(alb)
            
    return max_list
    
album_top_songs()

[{'Artist': 'Elvis Presley', 'Album': 'Elvis Presley', 'Count': 8}]

In [128]:
def album_with_top_songs():
    top_song_count = []
    top_song_list = all_value(songs, 'name')
    top_artist_list = all_value(songs, 'artist')
    top_song_list_with_artists = list(zip(top_artist_list, top_song_list))
    all_song_list = all_value(discography, 'tracks')

    count_list = []
    for number, alb in enumerate(all_song_list):
        count = 0
        for individual_song in alb:
            for song in top_song_list:
                if song == individual_song:
                    count += 1
        if count != 0:
            count_dict = {'Artist': discography[number]['artist'], 'Album': discography[number]['album'], 'Count': count}
            count_list.append(count_dict)
    return count_list
            
album_with_top_songs()

[{'Artist': 'Bob Dylan', 'Album': 'Highway 61 Revisited', 'Count': 3},
 {'Artist': 'Marvin Gaye', 'Album': "What's Going On", 'Count': 1},
 {'Artist': 'The Rolling Stones', 'Album': 'Exile on Main St.', 'Count': 1},
 {'Artist': 'Bob Dylan', 'Album': 'Blonde on Blonde', 'Count': 2},
 {'Artist': 'Elvis Presley', 'Album': 'The Sun Sessions', 'Count': 2},
 {'Artist': 'The Velvet Underground',
  'Album': 'The Velvet Underground & Nico',
  'Count': 1},
 {'Artist': 'The Jimi Hendrix Experience',
  'Album': 'Are You Experienced',
  'Count': 4},
 {'Artist': 'Bruce Springsteen', 'Album': 'Born to Run', 'Count': 2},
 {'Artist': 'Michael Jackson', 'Album': 'Thriller', 'Count': 1},
 {'Artist': 'Robert Johnson', 'Album': 'The Complete Recordings', 'Count': 1},
 {'Artist': 'Stevie Wonder', 'Album': 'Innervisions', 'Count': 1},
 {'Artist': 'U2', 'Album': 'The Joshua Tree', 'Count': 1},
 {'Artist': 'Led Zeppelin', 'Album': 'Led Zeppelin', 'Count': 2},
 {'Artist': 'Bob Dylan', 'Album': 'Bringing It All 

In [None]:
def song_on_top_albums():
    top_song_count = []
    top_song_list = all_value(songs, 'name')
    top_artist_list = all_value(songs, 'artist')
    top_song_list_with_artists = list(zip(top_artist_list, top_song_list))
    all_song_list = all_value(discography, 'tracks')

    count_list = []
    for number, alb in enumerate(all_song_list):
        count = 0
        for individual_song in alb:
            for song in top_song_list:
                if song == individual_song:
                    count += 1
        if count != 0:
            count_dict = {'Artist': discography[number]['artist'], 'Album': discography[number]['album'], 'Count': count}
            count_list.append(count_dict)
    return count_list
            
album_with_top_songs()

In [None]:
# def album_top_songs():
#     top_song_count = []
#     top_song_list = all_value(songs, 'name')
#     top_artist_list = all_value(songs, 'artist')
#     top_song_list_with_artists = list(zip(top_artist_list, top_song_list))

#     count_list = []
#     for alb in discography:
#         count = 0
#         for individual_song in alb['tracks']:
#             for song in top_song_list_with_artists:
#                 if song[1] in individual_song and alb['artist'] == song[0]:
#                     count += 1
#             continue
#         count_list.append(count)

In [45]:
top_song_count = []
top_song_list = all_value(songs, 'name')
top_artist_list = all_value(songs, 'artist')
top_song_list_with_artists = list(zip(top_artist_list, top_song_list))

len(top_song_list)

500

In [105]:
import re
def clean_songs():
    album_songs = []
    for disc in discography:
        for track in disc['tracks']:
            album_songs.append(re.sub)
            
    return album_songs
clean_songs()

["Sgt. Pepper's Lonely Hearts Club Band - Remix",
 'With A Little Help From My Friends - Remix',
 'Lucy In The Sky With Diamonds - Remix',
 'Getting Better - Remix',
 'Fixing A Hole - Remix',
 "She's Leaving Home - Remix",
 'Being For The Benefit Of Mr. Kite! - Remix',
 'Within You Without You - Remix',
 "When I'm Sixty-Four - Remix",
 'Lovely Rita - Remix',
 'Good Morning Good Morning - Remix',
 "Sgt. Pepper's Lonely Hearts Club Band (Reprise) - Remix",
 'A Day In The Life - Remix',
 "Sgt. Pepper's Lonely Hearts Club Band - Take 9 And Speech",
 'With A Little Help From My Friends - Take 1 / False Start And Take 2 / Instrumental',
 'Lucy In The Sky With Diamonds - Take 1',
 'Getting Better - Take 1 / Instrumental And Speech At The End',
 'Fixing A Hole - Speech And Take 3',
 "She's Leaving Home - Take 1 / Instrumental",
 'Being For The Benefit Of Mr. Kite! - Take 4',
 'Within You Without You - Take 1 / Indian Instruments',
 "When I'm Sixty-Four - Take 2",
 'Lovely Rita - Speech And Tak