In [1]:
import pandas as pd
import numpy as np

In [2]:
import warnings
warnings.filterwarnings("ignore")
pd.options.display.max_rows = 800
pd.options.display.max_columns = 200

In [3]:
nirvana = pd.read_csv('../Nirvana/nirvana_v5.csv')
alice = pd.read_csv('../Alice in Chains/alice_v3.csv')
pearl = pd.read_csv('../Pearl Jam/pearl_v3.csv')
sound = pd.read_csv('../Soundgarden/soundgarden_v3.csv')

In [4]:
nirvana.replace(np.nan, '', inplace=True)
alice.replace(np.nan, '', inplace=True)
pearl.replace(np.nan, '', inplace=True)
sound.replace(np.nan, '', inplace=True)

In [5]:
pearl['writers'] = pearl['authors']

In [6]:
alice['writers'] = alice['author']
alice['producers'] = alice['producer']

In [7]:
df = pd.concat([nirvana, alice, pearl, sound])
df.reset_index(inplace=True, drop=True)

In [8]:
df = df.drop(['album_release_date', 'track_album', 'track_title', 'album_title', 'authors'], axis=1)

In [9]:
df.album_type[df.album_type == 'album'] = 'studio album'

In [10]:
df.artists[df.artists == 'Alice In Chains, Praga Kahn, Oliver Adams'] = 'Alice In Chains'

In [11]:
df.producers[df.producers.isna()] = ''
df.producers[df.producers == 'Brendan O\'BrienPearl Jam'] = 'Brendan O\'Brien, Pearl Jam'
df.producers[df.producers == 'Josh EvansPearl Jam'] = 'Josh Evans, Pearl Jam'
df.producers[df.producers == 'Rick ParasharPearl Jam'] = 'Rick Parashar, Pearl Jam'

In [12]:
df.writers[df.writers == 'Eddie VedderJeff AmentStone GossardMike McCreadyDave Abbruzzese'] = 'Eddie Vedder, Stone Gossard, Mike McCready, Jeff Ament, Dave Abbruzzese'

In [13]:
df.track_genres = df.track_genres.str.strip()

In [14]:
df.track_styles = df.track_styles.apply(lambda x: x.replace(' ,', ','))

In [15]:
df.album_genres[df.album_genres.isna()] = ''

In [16]:
df.album_styles[df.album_styles == 'Alternative MetalAlternative Pop/RockAlternative/Indie RockGrungeHard RockHeavy Metal'] = 'Alternative Metal, Alternative Pop/Rock, Alternative/Indie Rock, Grunge, Hard Rock, Heavy Metal'
df.album_styles[df.album_styles == 'Alternative MetalGrungeHard Rock'] = 'Alternative Metal, Grunge, Hard Rock'
df.album_styles[df.album_styles == 'Alternative MetalHard RockHeavy MetalAlternative/Indie Rock'] = 'Alternative Metal, Hard Rock, Heavy Metal, Alternative/Indie Rock'
df.album_styles[df.album_styles == 'Alternative MetalHeavy MetalAlternative Pop/RockAlternative/Indie RockGrunge'] = 'Alternative Metal, Heavy Metal, Alternative Pop/Rock, Alternative/Indie Rock, Grunge'
df.album_styles[df.album_styles == 'Alternative MetalHeavy MetalHard Rock'] = 'Alternative Metal, Heavy Metal, Hard Rock'
df.album_styles[df.album_styles == 'Alternative Pop/RockAlternative/Indie RockGrunge'] = 'Alternative Pop/Rock, Alternative/Indie Rock, Grunge'
df.album_styles[df.album_styles == 'Alternative Pop/RockAlternative/Indie RockGrungeHard Rock'] = 'Alternative Pop/Rock, Alternative/Indie Rock, Grunge, Hard Rock'
df.album_styles[df.album_styles == 'Alternative Pop/RockAlternative/Indie RockGrungeHard RockPost-Grunge'] = 'Alternative Pop/Rock, Alternative/Indie Rock, Grunge, Hard Rock, Post-Grunge'
df.album_styles[df.album_styles == 'Alternative Pop/RockAlternative/Indie RockHard Rock'] = 'Alternative Pop/Rock, Alternative/Indie Rock, Hard Rock'
df.album_styles[df.album_styles == 'Alternative Pop/RockAlternative/Indie RockHard RockGrunge'] = 'Alternative Pop/Rock, Alternative/Indie Rock, Hard Rock, Grunge'
df.album_styles[df.album_styles == 'Alternative/Indie RockGrunge'] = 'Alternative/Indie Rock, Grunge'
df.album_styles[df.album_styles == 'Alternative/Indie RockGrungeAlternative Pop/RockAlternative MetalHeavy Metal'] = 'Alternative/Indie Rock, Grunge, Alternative Pop/Rock, Alternative Metal, Heavy Metal'
df.album_styles[df.album_styles == 'Alternative/Indie RockGrungeIndie Rock'] = 'Alternative/Indie Rock, Grunge, Indie Rock'
df.album_styles[df.album_styles == 'Alternative/Indie RockHard Rock'] = 'Alternative/Indie Rock, Hard Rock'

In [17]:
df.album_recording_date = df.album_recording_date.astype(str).apply(lambda x: x.replace('.0',''))

In [18]:
df.iloc[:, 33:] = df.iloc[:, 33:].replace(np.nan, None)
df.iloc[:, 33:] = df.iloc[:, 33:].replace('', None)

In [19]:
def merge_coulumns(df, col1, col2):
    # col1 - column with the right name
    # col2 - column with the wrong name
    for key, value in df[col2].items():
        if value != None:
            df[col1].iloc[key] = value
    df.drop(col2, axis=1,inplace=True)       

In [20]:
merge_coulumns(df, 'alternative_digital_song_sales_debut_date', 'alternative_digital_song\xa0sales_debut_date')
merge_coulumns(df, 'alternative_digital_song_sales_peak_date', 'alternative_digital_song\xa0sales_peak_date')
merge_coulumns(df, 'alternative_digital_song_sales_peak_pos', 'alternative_digital_song\xa0sales_peak_pos')
merge_coulumns(df, 'alternative_digital_song_sales_weaks_on_chart', 'alternative_digital_song\xa0sales_weaks_on_chart')

merge_coulumns(df, 'canadian_digital_song_sales_debut_date', 'canadian_digital_song\xa0sales_debut_date')
merge_coulumns(df, 'canadian_digital_song_sales_peak_date', 'canadian_digital_song\xa0sales_peak_date')
merge_coulumns(df, 'canadian_digital_song_sales_peak_pos', 'canadian_digital_song\xa0sales_peak_pos')
merge_coulumns(df, 'canadian_digital_song_sales_weaks_on_chart', 'canadian_digital_song\xa0sales_weaks_on_chart')

merge_coulumns(df, 'hot_rock_&_alternative_songs_debut_date', 'hot_rock_&_alternative\xa0songs_debut_date')
merge_coulumns(df, 'hot_rock_&_alternative_songs_peak_date', 'hot_rock_&_alternative\xa0songs_peak_date')
merge_coulumns(df, 'hot_rock_&_alternative_songs_peak_pos', 'hot_rock_&_alternative\xa0songs_peak_pos')
merge_coulumns(df, 'hot_rock_&_alternative_songs_weaks_on_chart', 'hot_rock_&_alternative\xa0songs_weaks_on_chart')

merge_coulumns(df, 'rock_digital_song_sales_debut_date', 'rock_digital_song\xa0sales_debut_date')
merge_coulumns(df, 'rock_digital_song_sales_peak_date', 'rock_digital_song\xa0sales_peak_date')
merge_coulumns(df, 'rock_digital_song_sales_peak_pos', 'rock_digital_song\xa0sales_peak_pos')
merge_coulumns(df, 'rock_digital_song_sales_weaks_on_chart', 'rock_digital_song\xa0sales_weaks_on_chart')

In [21]:
df.drop(['author', 'producer'], axis=1, inplace=True)

In [24]:
df.shape

(707, 97)

In [23]:
df.to_csv('grunge.csv', index=False)