In [57]:
import requests
import pandas as pd
from bs4 import BeautifulSoup, NavigableString, Tag
from googlesearch import search
import os
import numpy as np
import time

In [158]:
pd.set_option('display.max_rows', 500)

In [59]:
def voice_usa_crawl(season):
    url = "https://en.wikipedia.org/wiki/The_Voice_(American_season_{})".format(season)
    html = requests.get(url).content
    soup = BeautifulSoup(html, "lxml")
    
    input_1 = []
    input_2 = []
    pre_extraction = []
    
    for i in range(sum(map(lambda x: "Episode" in x, [row.text for row in soup.find_all('h3')]))+2):
        input_level_1 = soup.find_all('table',{'class':'wikitable'})[i]
        input_level_2 = input_level_1.find_all('tr')
        input_level_3 = [row.text.replace("\n\n", ",").replace("\n", ",").replace("\xa0", "").lstrip(",").split(",")
                         for row in input_level_2]
        pre_extraction.append(input_level_3)
    extraction = [item for episode in pre_extraction for item in episode]
    return(extraction)
    

In [60]:
d = {}
for season in range(1,18):
    d[season] = pd.DataFrame(voice_usa_crawl(season))

In [61]:
desired_order = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,17,16]

In [62]:
d_d = {k: d[k] for k in desired_order}

In [63]:
all_seasons = pd.DataFrame()
for key, value in d_d.items():
     df = value
     df.loc[:,'Season'] = key
     all_seasons = pd.concat([df, all_seasons], axis = 0, sort=True)
all_seasons = all_seasons.iloc[:,:-2]

In [64]:
all_seasons.head(25)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,Season
0,Coaches,Top 54 artists,,,,,,,,,,,,16
1,Adam Levine,,,,,,,,,,,,,16
2,LB Crew,Mari,Betsy Ade,Domenic Haynes,Kalvin Jarvis,,,,,,,,,16
3,LB Crew,Kendra Checketts,Celia Babini,Rod Stokes,Andrew Jannakos,,,,,,,,,16
4,Ciera Dumas,Jimmy Mowery,Karly Moreno,Patrick McAloon,Anthony Ortiz,,,,,,,,,16
5,Trey Rose,,,,,,,,,,,,,16
6,John Legend,,,,,,,,,,,,,16
7,Maelyn Jarmon,Shawn Sounds,Celia Babini,Jacob Maxwell,Jimmy Mowery,,,,,,,,,16
8,Lisa Ramey,Julian King,Beth Griffith-Manley,Kayslin Victoria,Savannah Brister,,,,,,,,,16
9,Oliv Blu,Betsy Ade,Matthew Johnson,Talon Cardon,Denton Arnell,,,,,,,,,16


In [65]:
all_seasons.Season.nunique()

17

In [66]:
colnames = ["Wiki_id", "Singer", "Age", "City", 'State', 'Song', 'Coach_1', 'Coach_2', 'Coach_3', 'Coach_4',
            'extra_1', 'extra_2', 'extra_3', 'Season']

In [67]:
all_seasons.columns = colnames

In [68]:
all_seasons.head(2)

Unnamed: 0,Wiki_id,Singer,Age,City,State,Song,Coach_1,Coach_2,Coach_3,Coach_4,extra_1,extra_2,extra_3,Season
0,Coaches,Top 54 artists,,,,,,,,,,,,16
1,Adam Levine,,,,,,,,,,,,,16


In [69]:
quasi_table_1 = all_seasons.loc[all_seasons.Wiki_id.str.isdigit()]

In [70]:
len(quasi_table_1)

1167

In [71]:
quasi_table_2 = quasi_table_1.loc[((quasi_table_1.Coach_1 == '✔') | (quasi_table_1.Coach_1 == '—')
            | (quasi_table_1.Coach_1 == '✘') | (quasi_table_1.Coach_1 == 'Team full') 
            | (quasi_table_1.Coach_1 == '–') | (quasi_table_1.Coach_1.str.contains('"', na=False)) 
            | (quasi_table_1.Coach_2.str.contains('"', na=False))) & (quasi_table_1.Wiki_id.str.isdigit())]

In [72]:
len(quasi_table_2)

1136

In [73]:
quasi_table_3 = quasi_table_2.loc[~quasi_table_2.Age.str.contains('"', na=False)]

In [74]:
len(quasi_table_3)

1127

In [75]:
quasi_table_3.head()

Unnamed: 0,Wiki_id,Singer,Age,City,State,Song,Coach_1,Coach_2,Coach_3,Coach_4,extra_1,extra_2,extra_3,Season
24,1,Gyth Rigdon,24,Singer,Louisiana,"""Drift Away""",—,✘,✔,✔,,,,16
25,2,Maelyn Jarmon,26,Frisco,Texas,"""Fields of Gold""",✔,✔,✔,✔,,,,16
26,3,Karen Galera,19,Dallas,Texas,"""Mi Corazoncito""",—,✔,✔,—,,,,16
27,4,Trey Rose,27,Hugo,Oklahoma,"""Wake Me Up""",✔,—,—,✔,,,,16
28,5,Kim Cherry,30,Niceville,Florida,"""No Scrubs""",—,—,✔,✔,,,,16


In [76]:
quasi_table_3 = quasi_table_3.reset_index(drop=True)

In [77]:
quasi_table_3.head()

Unnamed: 0,Wiki_id,Singer,Age,City,State,Song,Coach_1,Coach_2,Coach_3,Coach_4,extra_1,extra_2,extra_3,Season
0,1,Gyth Rigdon,24,Singer,Louisiana,"""Drift Away""",—,✘,✔,✔,,,,16
1,2,Maelyn Jarmon,26,Frisco,Texas,"""Fields of Gold""",✔,✔,✔,✔,,,,16
2,3,Karen Galera,19,Dallas,Texas,"""Mi Corazoncito""",—,✔,✔,—,,,,16
3,4,Trey Rose,27,Hugo,Oklahoma,"""Wake Me Up""",✔,—,—,✔,,,,16
4,5,Kim Cherry,30,Niceville,Florida,"""No Scrubs""",—,—,✔,✔,,,,16


In [78]:
st1 = quasi_table_3.loc[~quasi_table_3.Coach_1.str.contains('"')]
st1 = st1.loc[~st1.Coach_2.str.contains('"')]
st1 = st1.loc[~st1.Song.str.contains('—')]

In [79]:
st2 = quasi_table_3.loc[(quasi_table_3.Coach_1.str.contains('"'))|(quasi_table_3.Coach_2.str.contains('"'))]

In [80]:
st3 = quasi_table_3.loc[(quasi_table_3.Song == '—')]

In [81]:
len(st1)

1066

In [82]:
len(st2)

52

In [83]:
len(st3)

9

In [84]:
st3

Unnamed: 0,Wiki_id,Singer,Age,City,State,Song,Coach_1,Coach_2,Coach_3,Coach_4,extra_1,extra_2,extra_3,Season
815,6,Yasmin,,,"""Love Song""",—,—,—,—,,,,,5
940,4,Jasmin Rose,,,"""Mercy""",—,—,—,—,,,,,3
991,17,LaRae Rhodes,,,"""Black Horse and the Cherry Tree""",—,—,—,—,,,,,3
1009,10,Gedina,,,"""Ain't Nobody""",—,—,,,,,,,3
1027,5,Aly Jados,,,"""Because the Night""",—,—,—,—,,,,,2
1051,14,Moses Stone,26.0,Maryland,"""Let's Get It Started""",—,—,✔,—,,,,,2
1064,12,Luna Searles,,,"""Come to My Window""",—,—,—,—,,,,,2
1085,12,Beta,,,"""You Make Me Feel...""",—,Team full,,,,,,,2
1116,13,Serabee,34.0,Mississippi,"""Son of a Preacher Man""",—,—,—,✔,,,,,1


In [85]:
st3 = st3.copy()

In [86]:
st3 = st3.loc[:,['Singer','Age','City','State','Season']]

In [87]:
st3.head()

Unnamed: 0,Singer,Age,City,State,Season
815,Yasmin,,,"""Love Song""",5
940,Jasmin Rose,,,"""Mercy""",3
991,LaRae Rhodes,,,"""Black Horse and the Cherry Tree""",3
1009,Gedina,,,"""Ain't Nobody""",3
1027,Aly Jados,,,"""Because the Night""",2


In [88]:
the_columns = ['Singer', 'Age', 'State', 'Song', 'Season']

In [89]:
st3.columns = the_columns

In [90]:
st3.head()

Unnamed: 0,Singer,Age,State,Song,Season
815,Yasmin,,,"""Love Song""",5
940,Jasmin Rose,,,"""Mercy""",3
991,LaRae Rhodes,,,"""Black Horse and the Cherry Tree""",3
1009,Gedina,,,"""Ain't Nobody""",3
1027,Aly Jados,,,"""Because the Night""",2


In [91]:
the_columns_ordered = ['Season', 'Singer', 'Age', 'Song', 'State']

In [92]:
st3 = st3[the_columns_ordered]

In [93]:
st3

Unnamed: 0,Season,Singer,Age,Song,State
815,5,Yasmin,,"""Love Song""",
940,3,Jasmin Rose,,"""Mercy""",
991,3,LaRae Rhodes,,"""Black Horse and the Cherry Tree""",
1009,3,Gedina,,"""Ain't Nobody""",
1027,2,Aly Jados,,"""Because the Night""",
1051,2,Moses Stone,26.0,"""Let's Get It Started""",Maryland
1064,2,Luna Searles,,"""Come to My Window""",
1085,2,Beta,,"""You Make Me Feel...""",
1116,1,Serabee,34.0,"""Son of a Preacher Man""",Mississippi


In [94]:
st2

Unnamed: 0,Wiki_id,Singer,Age,City,State,Song,Coach_1,Coach_2,Coach_3,Coach_4,extra_1,extra_2,extra_3,Season
14,3,The Bundys(Megan,Katey,and Ryan),25-31,Cincinnati,Ohio,"""Closer to Fine""",—,—,✔,✔,,16
38,4,Denton Arnell,32,Chicago,Illinois,"""Hold On","We're Going Home""",—,✔,—,—,,,16
71,11,Marina Chello,37,Samarkand,Uzbekistan / Plainview,New York,"""Walk Me Home""",✔,—,—,✔,,,17
159,3,Hannah Blaylock,31,Nimmons,Arkansas,"""Baby","Now That I've Found You""",—,—,—,—,,,15
175,7,Funsho,29,Lagos,Nigeria / Los Angeles,California,"""Finesse""",✔,✔,✔,—,,,15
204,4,Mitch Cardoza,22,Dartmouth,Massachusetts,"""No Woman","No Cry""",—,—,—,—,,,14
209,9,Jorge Eduardo,19,Guadalajara,Mexico / Dallas,Texas,"""Despacito""",—,—,✔,—,,,14
211,11,Angel Bonilla,31,Quezon City,Philippines / New York City,New York,"""Lay Me Down""",✔,—,—,—,,,14
246,4,Xaris,17,Gulf Breeze,Florida,"""Don't Think Twice","It's All Right""",—,—,—,—,,,13
266,7,Kathrina Feigh,24,Quezon City,Philippines / New York City,New York,"""Big White Room""",—,—,✔,✔,,,13


In [95]:
usa = pd.read_excel("usa_states.ods", engine="odf")

In [96]:
usa.head()

Unnamed: 0,State,Code
0,D.C.,DC
1,Alabama,AL
2,Alaska,AK
3,Arizona,AZ
4,Arkansas,AR


In [97]:
states_list = usa.State.to_list()

In [98]:
st2_2 = st2.iloc[1:,:]

In [99]:
st2_2.head()

Unnamed: 0,Wiki_id,Singer,Age,City,State,Song,Coach_1,Coach_2,Coach_3,Coach_4,extra_1,extra_2,extra_3,Season
38,4,Denton Arnell,32,Chicago,Illinois,"""Hold On","We're Going Home""",—,✔,—,—,,,16
71,11,Marina Chello,37,Samarkand,Uzbekistan / Plainview,New York,"""Walk Me Home""",✔,—,—,✔,,,17
159,3,Hannah Blaylock,31,Nimmons,Arkansas,"""Baby","Now That I've Found You""",—,—,—,—,,,15
175,7,Funsho,29,Lagos,Nigeria / Los Angeles,California,"""Finesse""",✔,✔,✔,—,,,15
204,4,Mitch Cardoza,22,Dartmouth,Massachusetts,"""No Woman","No Cry""",—,—,—,—,,,14


In [100]:
st2_2["Song_ok"] = np.where((st2_2["Song"].str.contains('"')) & (st2_2["Coach_1"].str.contains('"')),
                               st2_2['Song'].str.cat(st2_2['Coach_1'],sep=", "), 
                               np.where((st2_2["Coach_1"].str.contains('"')) & (st2_2["Coach_2"].str.contains('"')),
                                        st2_2['Coach_1'].str.cat(st2_2['Coach_2'],sep=", "),
                                        np.where(st2_2["Coach_1"].str.contains('"'), st2_2["Coach_1"],
                                                 st2_2["Coach_2"])))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [101]:
st2_2["State_ok"] = np.where(st2_2["Song"].str.contains('/'), st2_2["Coach_1"], 
                               np.where(st2_2["State"].str.contains('/'), 
                                        np.where(st2_2["Song"].str.strip().isin(states_list), st2_2["Song"], 
                                        st2_2['State'].str.split('/').str[0]), 
                                            np.where(st2_2["State"].str.strip().isin(states_list), st2_2["State"], 
                                            st2_2["Song"])))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [102]:
st2_2 = st2_2.iloc[:, np.r_[1:3, -3:0]]

In [103]:
st2_2.head(2)

Unnamed: 0,Singer,Age,Season,Song_ok,State_ok
38,Denton Arnell,32,16,"""Hold On, We're Going Home""",Illinois
71,Marina Chello,37,17,"""Walk Me Home""",New York


In [104]:
st2_2_colnames = ["Singer", "Age", "Season", "Song", "State"]

In [105]:
st2_2.columns = st2_2_colnames

In [106]:
st2_2 = st2_2[the_columns_ordered]

In [107]:
st2_2.head()

Unnamed: 0,Season,Singer,Age,Song,State
38,16,Denton Arnell,32,"""Hold On, We're Going Home""",Illinois
71,17,Marina Chello,37,"""Walk Me Home""",New York
159,15,Hannah Blaylock,31,"""Baby, Now That I've Found You""",Arkansas
175,15,Funsho,29,"""Finesse""",California
204,14,Mitch Cardoza,22,"""No Woman, No Cry""",Massachusetts


In [108]:
st2_1 = st2.copy()

In [109]:
st2_1["Singer"] = np.where(st2_1["Singer"].str.contains("\("),
                    st2_1["Singer"].str.split("(").str[0], st2_1["Singer"])

In [110]:
st2_1 = st2_1.iloc[:, np.r_[1, 4, 6:8, -1]]

In [111]:
st2_1.iloc[0,1] = 28

In [112]:
st2_1.columns = ["Singer", "Age", "State", "Song", "Season"]

In [113]:
st2_1 = st2_1[the_columns_ordered]

In [114]:
st2_1 = st2_1.iloc[0:1,:]

In [115]:
st2_1

Unnamed: 0,Season,Singer,Age,Song,State
14,16,The Bundys,28,"""Closer to Fine""",Ohio


In [116]:
st2_3 = pd.concat([st2_2, st2_1], axis=0, sort=False)

In [117]:
st23 = pd.concat([st3, st2_3], axis=0, sort=False)

In [118]:
st23

Unnamed: 0,Season,Singer,Age,Song,State
815,5,Yasmin,,"""Love Song""",
940,3,Jasmin Rose,,"""Mercy""",
991,3,LaRae Rhodes,,"""Black Horse and the Cherry Tree""",
1009,3,Gedina,,"""Ain't Nobody""",
1027,2,Aly Jados,,"""Because the Night""",
1051,2,Moses Stone,26,"""Let's Get It Started""",Maryland
1064,2,Luna Searles,,"""Come to My Window""",
1085,2,Beta,,"""You Make Me Feel...""",
1116,1,Serabee,34,"""Son of a Preacher Man""",Mississippi
38,16,Denton Arnell,32,"""Hold On, We're Going Home""",Illinois


In [119]:
st1.head()

Unnamed: 0,Wiki_id,Singer,Age,City,State,Song,Coach_1,Coach_2,Coach_3,Coach_4,extra_1,extra_2,extra_3,Season
0,1,Gyth Rigdon,24,Singer,Louisiana,"""Drift Away""",—,✘,✔,✔,,,,16
1,2,Maelyn Jarmon,26,Frisco,Texas,"""Fields of Gold""",✔,✔,✔,✔,,,,16
2,3,Karen Galera,19,Dallas,Texas,"""Mi Corazoncito""",—,✔,✔,—,,,,16
3,4,Trey Rose,27,Hugo,Oklahoma,"""Wake Me Up""",✔,—,—,✔,,,,16
4,5,Kim Cherry,30,Niceville,Florida,"""No Scrubs""",—,—,✔,✔,,,,16


In [120]:
st1 = st1.iloc[:,np.r_[1:3,4:6,13]]

In [121]:
st1 = st1[the_columns_ordered]

In [122]:
st123 = pd.concat([st23, st1], axis=0, sort=False)

In [123]:
st123.sort_index(inplace=True)

In [124]:
st123.Age.value_counts()

25         67
23         65
17         65
16         64
26         64
20         64
24         63
N/A        60
21         55
19         53
22         49
18         46
28         46
29         43
27         40
15         35
31         34
30         33
33         27
35         22
34         20
32         18
37          9
38          8
14          7
36          6
40          6
39          4
45          4
13          4
52          4
42          4
41          3
44          3
51          2
56          2
54          2
46          2
13[a]       1
28          1
30 / 32     1
66          1
32 & 26     1
25/23       1
13-14       1
55          1
43          1
48          1
27-28       1
24 & 27     1
61          1
50          1
19 / 18     1
17 & 19     1
49          1
23 & 44     1
35 & 28     1
44 / 36     1
62          1
19 & 20     1
64          1
35–36       1
Name: Age, dtype: int64

In [125]:
st123["Age"] = st123.Age.apply(str)

In [126]:
st123["Age"] = st123["Age"].apply(lambda x: x.replace("N/A", "na")) 

In [127]:
st123["Age"] = st123["Age"].apply(lambda x: x.replace("&", "-").replace("/", "-").replace("–", "-")
                                                 .replace("[", "-"))

In [128]:
st123["Age"] = np.where(st123["Age"].str.contains('-'),
                        st123['Age'].str.split('-').str[0], st123['Age']) 

In [129]:
st123["Age"] = st123["Age"].apply(lambda x: x.replace("na", "N/A")) 

In [130]:
st123.Age.value_counts()

25     68
23     65
17     65
16     64
26     64
20     64
24     63
N/A    60
21     55
19     53
22     49
28     47
18     46
29     43
27     41
15     35
31     34
30     33
33     27
35     23
34     20
32     18
37      9
38      8
14      7
36      6
40      6
13      6
52      4
45      4
42      4
39      4
44      3
41      3
19      2
46      2
51      2
56      2
54      2
49      1
66      1
55      1
44      1
35      1
17      1
50      1
61      1
43      1
23      1
64      1
48      1
62      1
30      1
24      1
32      1
Name: Age, dtype: int64

In [131]:
st123.State.value_counts()

 California              165
 Texas                   109
 New York                 88
 Florida                  81
 Tennessee                70
 Georgia                  57
 Illinois                 44
 New Jersey               39
 Pennsylvania             31
 Louisiana                27
 Massachusetts            26
 North Carolina           25
 Alabama                  22
 Washington               21
 Virginia                 19
 Ohio                     18
 Missouri                 18
 Kentucky                 18
 Michigan                 18
 Oklahoma                 17
 Utah                     15
 Arkansas                 14
 South Carolina           13
 Indiana                  12
 Minnesota                12
 Maryland                 11
 Connecticut              11
 Colorado                  8
N/A                        7
 Iowa                      7
 Arizona                   7
 Nevada                    7
 Rhode Island              7
 D.C.                      6
 Mississippi  

In [132]:
st123.Season = st123.Season.apply(str)

In [133]:
st123 = st123.applymap(lambda x: x.strip())

In [134]:
st123.at[st123.loc[st123.State == 'IL',:].index[0], "State"] = 'Illinois'

In [135]:
st123.at[st123.loc[st123.State == 'Illiinois/Indiana',:].index[0], "State"] = 'Illinois'

In [136]:
st123.at[st123.loc[st123.State == 'Georgia /El Salvador',:].index[0], "State"] = 'Georgia'

In [137]:
st123.State.value_counts().tail(15)

Alaska                 3
Delaware               3
Scotland               2
Brazil                 2
U.S. Virgin Islands    1
Samoa                  1
UK                     1
Haiti                  1
Vermont                1
Jamaica                1
Philippines            1
Montana                1
Canada                 1
South Africa           1
Maine                  1
Name: State, dtype: int64

In [138]:
st123.Season = st123.Season.apply(int)

In [139]:
st123["Country"] = np.where(st123["State"] == 'N/A', 'N/A',
                            np.where(st123["State"].str.strip().isin(states_list), "USA", "RoW"))

In [140]:
st123.head(30)

Unnamed: 0,Season,Singer,Age,Song,State,Country
0,16,Gyth Rigdon,24,"""Drift Away""",Louisiana,USA
1,16,Maelyn Jarmon,26,"""Fields of Gold""",Texas,USA
2,16,Karen Galera,19,"""Mi Corazoncito""",Texas,USA
3,16,Trey Rose,27,"""Wake Me Up""",Oklahoma,USA
4,16,Kim Cherry,30,"""No Scrubs""",Florida,USA
5,16,AJ Ryan,30,"""Love Runs Out""",New York,USA
6,16,Rizzi Myers,29,"""Breathin""",Tennessee,USA
7,16,Lisa Ramey,33,"""Sex on Fire""",New York,USA
8,16,Jimmy Mowery,31,"""Attention""",Pennsylvania,USA
9,16,LiLi Joy,15,"""Cool""",California,USA


In [141]:
st123.reset_index(inplace=True)

In [142]:
st123["Season_id"] = st123.groupby('Season')["index"].rank(method='first').apply(int)

In [143]:
column_order_2 = ["Season", "Season_id", "Singer", "Age", "Song", "State", "Country"]

In [144]:
st123 = st123[column_order_2]

In [145]:
st123.head()

Unnamed: 0,Season,Season_id,Singer,Age,Song,State,Country
0,16,1,Gyth Rigdon,24,"""Drift Away""",Louisiana,USA
1,16,2,Maelyn Jarmon,26,"""Fields of Gold""",Texas,USA
2,16,3,Karen Galera,19,"""Mi Corazoncito""",Texas,USA
3,16,4,Trey Rose,27,"""Wake Me Up""",Oklahoma,USA
4,16,5,Kim Cherry,30,"""No Scrubs""",Florida,USA


In [146]:
st123["Song_trimmed"] = st123.Song.apply(lambda x: x.strip('"'))

In [147]:
st123["for_the_search"] = 'song wikipedia'

In [148]:
st123["Song_for_the_search"] = st123[["Song_trimmed", "for_the_search"]].apply(lambda row: ' '.join(row.values.astype(str)), axis=1)

In [149]:
st123.head()

Unnamed: 0,Season,Season_id,Singer,Age,Song,State,Country,Song_trimmed,for_the_search,Song_for_the_search
0,16,1,Gyth Rigdon,24,"""Drift Away""",Louisiana,USA,Drift Away,song wikipedia,Drift Away song wikipedia
1,16,2,Maelyn Jarmon,26,"""Fields of Gold""",Texas,USA,Fields of Gold,song wikipedia,Fields of Gold song wikipedia
2,16,3,Karen Galera,19,"""Mi Corazoncito""",Texas,USA,Mi Corazoncito,song wikipedia,Mi Corazoncito song wikipedia
3,16,4,Trey Rose,27,"""Wake Me Up""",Oklahoma,USA,Wake Me Up,song wikipedia,Wake Me Up song wikipedia
4,16,5,Kim Cherry,30,"""No Scrubs""",Florida,USA,No Scrubs,song wikipedia,No Scrubs song wikipedia


In [320]:
def song_crawl(song):
    for url in search(song, lang="en", num=1, stop=1, pause=1):
        html = requests.get(url).content
        soup = BeautifulSoup(html, "lxml")
        if soup.find_all('td',{'class':'category hlist'}) and soup.find_all('tr',{'class':'description'}):
            level_1 = soup.find_all('td',{'class':'category hlist'})[0]
            level_1b = soup.find_all('tr',{'class':'description'})[0]
            level_2b = [level_1b.text]
            if level_1.find('li'):
                level_2 = level_1.find_all('li')
                level_3 = [row.text for row in level_2]
                return [level_2b, level_3]
            elif not level_1.find('li'):
                level_23 = [level_1.text]
                return [level_2b, level_23]
        elif soup.find_all('td',{'class':'category hlist'}) and not soup.find_all('tr',{'class':'description'}):
            level_1 = soup.find_all('td',{'class':'category hlist'})[0]
            if level_1.find('li'):
                level_2 = level_1.find_all('li')
                level_3 = [row.text for row in level_2]
                return [["No artist found"], level_3]
            elif not level_1.find('li'):
                level_23 = [level_1.text]
                return [["No artist found"], level_23]
        elif soup.find_all('tr',{'class':'description'}) and not soup.find_all('td',{'class':'category hlist'}):
            level_1b = soup.find_all('tr',{'class':'description'})[0]
            level_2b = [level_1b.text]
            return [level_2b, ["No genre found"]]
        else:
            return [["No artist found"], ["No genre found"]]

In [345]:
def genre_crawl(artist):
    for url in search(artist, lang="en", num=1, stop=1, pause=1):
        html = requests.get(url).content
        soup = BeautifulSoup(html, "lxml")
        if soup.find_all('td',{'class':'category hlist'}):
            level_1 = soup.find_all('td',{'class':'category hlist'})[0]
            if level_1.find('li'):
                level_2 = level_1.find_all('li')
                level_3 = [row.text for row in level_2]
                return level_3
            elif not level_1.find('li'):
                level_23 = [level_1.text]
                return level_23
        else:
            return ["No genre"]


In [150]:
def artist_crawl(song):
    for url in search(song, lang="en", num=1, stop=1, pause=1):
        html = requests.get(url).content
        soup = BeautifulSoup(html, "lxml")
        if soup.find_all('tr',{'class':'description'}):
            level_1 = soup.find_all('tr',{'class':'description'})[0]
            level_2 = level_1.text
            return level_2
        else:
            return "No artist found"

In [321]:
st123["Return"] = st123["Song_for_the_search"].apply(lambda x: song_crawl(x))

In [326]:
st123.tail(50)

Unnamed: 0,Season,Season_id,Singer,Age,Song,State,Country,Song_trimmed,for_the_search,Song_for_the_search,Return
1077,2,64,Cheesa,21.0,"""If I Were a Boy""",Hawaii,USA,If I Were a Boy,song wikipedia,If I Were a Boy song wikipedia,"[[Single by Beyoncé], [Pop, R&B]]"
1078,2,65,Preston Shannon,64.0,"""In the Midnight Hour""",Tennessee,USA,In the Midnight Hour,song wikipedia,In the Midnight Hour song wikipedia,"[[Single by Wilson Pickett], [R&B, soul]]"
1079,2,66,Lex Land,24.0,"""I Can't Make You Love Me""",Texas,USA,I Can't Make You Love Me,song wikipedia,I Can't Make You Love Me song wikipedia,"[[Single by Bonnie Raitt], [Pop, adult contemp..."
1080,2,67,Cameron Novack,28.0,"""You Oughta Know""",Missouri,USA,You Oughta Know,song wikipedia,You Oughta Know song wikipedia,"[[Single by Alanis Morissette], [Alternative r..."
1081,2,68,Orlando Napier,25.0,"""Waiting on the World to Change""",California,USA,Waiting on the World to Change,song wikipedia,Waiting on the World to Change song wikipedia,"[[Single by John Mayer], [Jazz rock, pop rock,..."
1082,2,69,Lee Koch,27.0,"""Like a Rolling Stone""",California,USA,Like a Rolling Stone,song wikipedia,Like a Rolling Stone song wikipedia,"[[Single by Bob Dylan], [Folk rock[2]]]"
1083,2,70,WADE,19.0,"""Rehab""",Alabama,USA,Rehab,song wikipedia,Rehab song wikipedia,"[[Single by Amy Winehouse], [Soul, rhythm and ..."
1084,2,71,Adley Stump,21.0,"""Last Name""",Oklahoma,USA,Last Name,song wikipedia,Last Name song wikipedia,"[[Single by Carrie Underwood], [Country rock]]"
1085,2,72,Beta,,"""You Make Me Feel...""",,,You Make Me Feel...,song wikipedia,You Make Me Feel... song wikipedia,"[[Single by Cobra Starship featuring Sabi], [D..."
1086,2,73,Aaron Gordon,,"""So Sick""",Canada,RoW,So Sick,song wikipedia,So Sick song wikipedia,"[[Single by Ne-Yo], [R&B]]"


In [340]:
ft = st123.copy()

In [341]:
ft.head()

Unnamed: 0,Season,Season_id,Singer,Age,Song,State,Country,Song_trimmed,for_the_search,Song_for_the_search,Return
0,16,1,Gyth Rigdon,24,"""Drift Away""",Louisiana,USA,Drift Away,song wikipedia,Drift Away song wikipedia,"[[Single by Dobie Gray], [Soft rock, R&B [1]]]"
1,16,2,Maelyn Jarmon,26,"""Fields of Gold""",Texas,USA,Fields of Gold,song wikipedia,Fields of Gold song wikipedia,"[[Single by Sting], [No genre found]]"
2,16,3,Karen Galera,19,"""Mi Corazoncito""",Texas,USA,Mi Corazoncito,song wikipedia,Mi Corazoncito song wikipedia,"[[Single by Aventura], [Bachata]]"
3,16,4,Trey Rose,27,"""Wake Me Up""",Oklahoma,USA,Wake Me Up,song wikipedia,Wake Me Up song wikipedia,"[[Single by Avicii], [Folktronica, house, danc..."
4,16,5,Kim Cherry,30,"""No Scrubs""",Florida,USA,No Scrubs,song wikipedia,No Scrubs song wikipedia,"[[Single by TLC], [R&B]]"


In [342]:
ft[["Artist_1", "Genre_1"]] = ft['Return'].apply(pd.Series)

In [343]:
ft["Artist_2"] = ft.Artist_1.apply(lambda x: ' '.join(map(str,x)))
ft["Genre_2"] = ft.Genre_1.apply(lambda x: ' '.join(map(str,x)))

In [344]:
ft.head()

Unnamed: 0,Season,Season_id,Singer,Age,Song,State,Country,Song_trimmed,for_the_search,Song_for_the_search,Return,Artist_1,Genre_1,Artist_2,Genre_2
0,16,1,Gyth Rigdon,24,"""Drift Away""",Louisiana,USA,Drift Away,song wikipedia,Drift Away song wikipedia,"[[Single by Dobie Gray], [Soft rock, R&B [1]]]",[Single by Dobie Gray],"[Soft rock, R&B [1]]",Single by Dobie Gray,"Soft rock, R&B [1]"
1,16,2,Maelyn Jarmon,26,"""Fields of Gold""",Texas,USA,Fields of Gold,song wikipedia,Fields of Gold song wikipedia,"[[Single by Sting], [No genre found]]",[Single by Sting],[No genre found],Single by Sting,No genre found
2,16,3,Karen Galera,19,"""Mi Corazoncito""",Texas,USA,Mi Corazoncito,song wikipedia,Mi Corazoncito song wikipedia,"[[Single by Aventura], [Bachata]]",[Single by Aventura],[Bachata],Single by Aventura,Bachata
3,16,4,Trey Rose,27,"""Wake Me Up""",Oklahoma,USA,Wake Me Up,song wikipedia,Wake Me Up song wikipedia,"[[Single by Avicii], [Folktronica, house, danc...",[Single by Avicii],"[Folktronica, house, dance-pop]",Single by Avicii,"Folktronica, house, dance-pop"
4,16,5,Kim Cherry,30,"""No Scrubs""",Florida,USA,No Scrubs,song wikipedia,No Scrubs song wikipedia,"[[Single by TLC], [R&B]]",[Single by TLC],[R&B],Single by TLC,R&B


In [346]:
ft["Genre_3"] = np.where(ft["Genre_2"] == 'No genre found',
                         ft['Artist_2'].apply(lambda x: genre_crawl(x)), ft['Genre_2']) 

In [349]:
ft.tail(50)

Unnamed: 0,Season,Season_id,Singer,Age,Song,State,Country,Song_trimmed,for_the_search,Song_for_the_search,Return,Artist_1,Genre_1,Artist_2,Genre_2,Genre_3
1077,2,64,Cheesa,21.0,"""If I Were a Boy""",Hawaii,USA,If I Were a Boy,song wikipedia,If I Were a Boy song wikipedia,"[[Single by Beyoncé], [Pop, R&B]]",[Single by Beyoncé],"[Pop, R&B]",Single by Beyoncé,Pop R&B,Pop R&B
1078,2,65,Preston Shannon,64.0,"""In the Midnight Hour""",Tennessee,USA,In the Midnight Hour,song wikipedia,In the Midnight Hour song wikipedia,"[[Single by Wilson Pickett], [R&B, soul]]",[Single by Wilson Pickett],"[R&B, soul]",Single by Wilson Pickett,R&B soul,R&B soul
1079,2,66,Lex Land,24.0,"""I Can't Make You Love Me""",Texas,USA,I Can't Make You Love Me,song wikipedia,I Can't Make You Love Me song wikipedia,"[[Single by Bonnie Raitt], [Pop, adult contemp...",[Single by Bonnie Raitt],"[Pop, adult contemporary]",Single by Bonnie Raitt,Pop adult contemporary,Pop adult contemporary
1080,2,67,Cameron Novack,28.0,"""You Oughta Know""",Missouri,USA,You Oughta Know,song wikipedia,You Oughta Know song wikipedia,"[[Single by Alanis Morissette], [Alternative r...",[Single by Alanis Morissette],"[Alternative rock, post-grunge]",Single by Alanis Morissette,Alternative rock post-grunge,Alternative rock post-grunge
1081,2,68,Orlando Napier,25.0,"""Waiting on the World to Change""",California,USA,Waiting on the World to Change,song wikipedia,Waiting on the World to Change song wikipedia,"[[Single by John Mayer], [Jazz rock, pop rock,...",[Single by John Mayer],"[Jazz rock, pop rock, blues rock]",Single by John Mayer,Jazz rock pop rock blues rock,Jazz rock pop rock blues rock
1082,2,69,Lee Koch,27.0,"""Like a Rolling Stone""",California,USA,Like a Rolling Stone,song wikipedia,Like a Rolling Stone song wikipedia,"[[Single by Bob Dylan], [Folk rock[2]]]",[Single by Bob Dylan],[Folk rock[2]],Single by Bob Dylan,Folk rock[2],Folk rock[2]
1083,2,70,WADE,19.0,"""Rehab""",Alabama,USA,Rehab,song wikipedia,Rehab song wikipedia,"[[Single by Amy Winehouse], [Soul, rhythm and ...",[Single by Amy Winehouse],"[Soul, rhythm and blues]",Single by Amy Winehouse,Soul rhythm and blues,Soul rhythm and blues
1084,2,71,Adley Stump,21.0,"""Last Name""",Oklahoma,USA,Last Name,song wikipedia,Last Name song wikipedia,"[[Single by Carrie Underwood], [Country rock]]",[Single by Carrie Underwood],[Country rock],Single by Carrie Underwood,Country rock,Country rock
1085,2,72,Beta,,"""You Make Me Feel...""",,,You Make Me Feel...,song wikipedia,You Make Me Feel... song wikipedia,"[[Single by Cobra Starship featuring Sabi], [D...",[Single by Cobra Starship featuring Sabi],"[Dance-pop, electropop]",Single by Cobra Starship featuring Sabi,Dance-pop electropop,Dance-pop electropop
1086,2,73,Aaron Gordon,,"""So Sick""",Canada,RoW,So Sick,song wikipedia,So Sick song wikipedia,"[[Single by Ne-Yo], [R&B]]",[Single by Ne-Yo],[R&B],Single by Ne-Yo,R&B,R&B


In [350]:
ft.loc[ft.Genre_2 == "No genre found"]

Unnamed: 0,Season,Season_id,Singer,Age,Song,State,Country,Song_trimmed,for_the_search,Song_for_the_search,Return,Artist_1,Genre_1,Artist_2,Genre_2,Genre_3
1,16,2,Maelyn Jarmon,26.0,"""Fields of Gold""",Texas,USA,Fields of Gold,song wikipedia,Fields of Gold song wikipedia,"[[Single by Sting], [No genre found]]",[Single by Sting],[No genre found],Single by Sting,No genre found,[No genre]
5,16,6,AJ Ryan,30.0,"""Love Runs Out""",New York,USA,Love Runs Out,song wikipedia,Love Runs Out song wikipedia,"[[Single by OneRepublic], [No genre found]]",[Single by OneRepublic],[No genre found],Single by OneRepublic,No genre found,[No genre]
9,16,10,LiLi Joy,15.0,"""Cool""",California,USA,Cool,song wikipedia,Cool song wikipedia,"[[Single by Jonas Brothers], [No genre found]]",[Single by Jonas Brothers],[No genre found],Single by Jonas Brothers,No genre found,[No genre]
12,16,13,Domenic Haynes,18.0,"""River""",Florida,USA,River,song wikipedia,River song wikipedia,"[[No artist found], [No genre found]]",[No artist found],[No genre found],No artist found,No genre found,
17,16,18,Julian King,25.0,"""All Time Low""",Pennsylvania,USA,All Time Low,song wikipedia,All Time Low song wikipedia,"[[Single by Jon Bellion], [No genre found]]",[Single by Jon Bellion],[No genre found],Single by Jon Bellion,No genre found,[No genre]
36,16,37,Selkii,31.0,"""I Try""",South Africa,RoW,I Try,song wikipedia,I Try song wikipedia,"[[Single by Macy Gray], [No genre found]]",[Single by Macy Gray],[No genre found],Single by Macy Gray,No genre found,[No genre]
46,16,47,Oliv Blu,20.0,"""On & On""",Illinois,USA,On & On,song wikipedia,On & On song wikipedia,"[[No artist found], [No genre found]]",[No artist found],[No genre found],No artist found,No genre found,
49,16,50,Celia Babini,17.0,"""Idontwannabeyouanymore""",New York,USA,Idontwannabeyouanymore,song wikipedia,Idontwannabeyouanymore song wikipedia,"[[Single by Billie Eilish], [No genre found]]",[Single by Billie Eilish],[No genre found],Single by Billie Eilish,No genre found,[No genre]
51,16,52,Kalvin Jarvis,29.0,"""A Good Night""",Arizona,USA,A Good Night,song wikipedia,A Good Night song wikipedia,"[[Single by John Legend featuring BloodPop], [...",[Single by John Legend featuring BloodPop],[No genre found],Single by John Legend featuring BloodPop,No genre found,[No genre]
59,16,60,Maddi Fraser,24.0,"""Get It While You Can""",California,USA,Get It While You Can,song wikipedia,Get It While You Can song wikipedia,"[[No artist found], [No genre found]]",[No artist found],[No genre found],No artist found,No genre found,


In [351]:
ft.to_csv("../ft.csv", index=False)

In [None]:
ft["Genre_4"] = ft.Genre_3.apply(lambda x: x.replace("[1]", ""))
ft["Genre_4"] = ft.Genre_3.apply(lambda x: x.replace("[2]", ""))
ft["Genre_4"] = ft.Genre_3.apply(lambda x: x.replace("[3]", ""))
ft["Genre_4"] = ft.Genre_3.apply(lambda x: x.replace("[4]", ""))
ft["Genre_4"] = ft.Genre_3.apply(lambda x: x.replace("[5]", ""))
ft["Genre_4"] = ft.Genre_3.apply(lambda x: x.replace("[6]", ""))
ft["Genre_4"] = ft.Genre_3.apply(lambda x: x.replace("[7]", ""))

In [354]:
def replacing_references(string):
    for ref_num in range (10):
        var = string.replace("[{}]", "").format(ref_num)
        string = var
    return string

In [159]:
st123.Genre.value_counts()

[No genre]                                                            84
[Country]                                                             83
[Soul]                                                                57
[Pop]                                                                 41
[R&B]                                                                 36
[Pop rock]                                                            27
[Pop, R&B]                                                            19
[Folk rock]                                                           16
[Country rock]                                                        14
[Country pop]                                                         10
[Alternative rock]                                                    10
[Folk]                                                                 9
[Pop[1]]                                                               9
[Soft rock[1]]                                     

In [314]:
st123.Artist.value_counts()

No artist found                             58
Single by Bruno Mars                        15
Song                                        13
Single by Katy Perry                        13
Single by Adele                             12
                                            ..
Single by Don Williams                       1
Single by Ray Charles with Willie Nelson     1
Demo by Jason Mraz                           1
Single by Janis Joplin                       1
Single by Eminem                             1
Name: Artist, Length: 571, dtype: int64

In [315]:
st123.loc[st123.Artist == 'Song']

Unnamed: 0,Season,Season_id,Singer,Age,Song,State,Country,Song_trimmed,for_the_search,Song_for_the_search,Artist,Genre
96,17,36,Preston C. Howell,14,"""Dream a Little Dream of Me""",Florida,USA,Dream a Little Dream of Me,song wikipedia,Dream a Little Dream of Me song wikipedia,Song,<filter object at 0x7fb5a8d136a0>
231,14,51,Livia Faith,17,"""Dream a Little Dream of Me""",Kentucky,USA,Dream a Little Dream of Me,song wikipedia,Dream a Little Dream of Me song wikipedia,Song,<filter object at 0x7fb5a8d136a0>
243,13,1,Chris Weaver,29,"""Try a Little Tenderness""",New York,USA,Try a Little Tenderness,song wikipedia,Try a Little Tenderness song wikipedia,Song,<filter object at 0x7fb5a8d136a0>
279,13,37,Emily Luther,24,"""Summertime""",Rhode Island,USA,Summertime,song wikipedia,Summertime song wikipedia,Song,<filter object at 0x7fb5a8d136a0>
305,12,3,Johnny Hayes,29,"""Try a Little Tenderness""",Alabama,USA,Try a Little Tenderness,song wikipedia,Try a Little Tenderness song wikipedia,Song,<filter object at 0x7fb5a8d136a0>
328,12,26,Ericka Corban,31,"""Wade in the Water""",Washington,USA,Wade in the Water,song wikipedia,Wade in the Water song wikipedia,Song,<filter object at 0x7fb5a8d136a0>
371,11,7,Riley Elmore,16,"""The Way You Look Tonight""",Illinois,USA,The Way You Look Tonight,song wikipedia,The Way You Look Tonight song wikipedia,Song,<filter object at 0x7fb5a8d136a0>
379,11,15,Wé McDonald,17,"""Feeling Good""",New Jersey,USA,Feeling Good,song wikipedia,Feeling Good song wikipedia,Song,<filter object at 0x7fb5a8d136a0>
394,11,30,Simone Gundy,26,"""I (Who Have Nothing)""",Texas,USA,I (Who Have Nothing),song wikipedia,I (Who Have Nothing) song wikipedia,Song,<filter object at 0x7fb5a8d136a0>
546,9,60,Amy Vachal,26,"""Dream a Little Dream of Me""",New York,USA,Dream a Little Dream of Me,song wikipedia,Dream a Little Dream of Me song wikipedia,Song,<filter object at 0x7fb5a8d136a0>


In [241]:

songs_list = [["Song A", "Song B", "Song_C"], ["baby song wikipedia", "Drift away song wikipedia", "If I had you song simple wikipedia"]]

In [242]:
songs_list

[['Song A', 'Song B', 'Song_C'],
 ['baby song wikipedia',
  'Drift away song wikipedia',
  'If I had you song simple wikipedia']]

In [243]:
songs_cols = songs_list[0]
songs_rows = songs_list[0:3]

In [244]:
songs = pd.DataFrame(songs_list, columns=songs_cols).transpose()

In [245]:
songs

Unnamed: 0,0,1
Song A,Song A,baby song wikipedia
Song B,Song B,Drift away song wikipedia
Song_C,Song_C,If I had you song simple wikipedia


In [246]:
songs_colnames = ["alpha", "beta"]

In [247]:
songs.columns = songs_colnames

In [248]:
songs.head()

Unnamed: 0,alpha,beta
Song A,Song A,baby song wikipedia
Song B,Song B,Drift away song wikipedia
Song_C,Song_C,If I had you song simple wikipedia


In [194]:
songs["genre"] = songs["beta"].apply(lambda x: genre_crawl_2(x))

In [306]:
songs["test"] = songs["beta"].apply(lambda x: genre_crawl_2(x))

In [307]:
songs

Unnamed: 0,alpha,beta,test,test_4,test_5,test_6,test_7
Song A,Song A,baby song wikipedia,"[[Single by Justin Bieber featuring Ludacris],...",[Single by Justin Bieber featuring Ludacris],No genre,N o g e n r e,No genre
Song B,Song B,Drift away song wikipedia,"[[Single by Dobie Gray], [Soft rock, R&B [1]]]",[Single by Dobie Gray],"[Soft rock, R&B [1]]","Soft rock, R&B [1]","Soft rock, R&B"
Song_C,Song_C,If I had you song simple wikipedia,"[[Single by Adam Lambert], [Electronic rock, d...",[Single by Adam Lambert],"[Electronic rock, dance-pop]",Electronic rock dance-pop,Electronic rock dance-pop


In [308]:
songs[["test_4", "test_5"]] = songs['test'].apply(pd.Series)

In [309]:
songs.head()

Unnamed: 0,alpha,beta,test,test_4,test_5,test_6,test_7
Song A,Song A,baby song wikipedia,"[[Single by Justin Bieber featuring Ludacris],...",[Single by Justin Bieber featuring Ludacris],[No genre],N o g e n r e,No genre
Song B,Song B,Drift away song wikipedia,"[[Single by Dobie Gray], [Soft rock, R&B [1]]]",[Single by Dobie Gray],"[Soft rock, R&B [1]]","Soft rock, R&B [1]","Soft rock, R&B"
Song_C,Song_C,If I had you song simple wikipedia,"[[Single by Adam Lambert], [Electronic rock, d...",[Single by Adam Lambert],"[Electronic rock, dance-pop]",Electronic rock dance-pop,Electronic rock dance-pop


In [271]:
songs.loc[songs.alpha == "Song A"]

Unnamed: 0,alpha,beta,test,test_4,test_5
Song A,Song A,baby song wikipedia,"[[Single by Justin Bieber featuring Ludacris],...",[Single by Justin Bieber featuring Ludacris],[No genre]


In [310]:
songs["test_6"] = songs.test_5.apply(lambda x: ' '.join(map(str,x)))

In [311]:
songs

Unnamed: 0,alpha,beta,test,test_4,test_5,test_6,test_7
Song A,Song A,baby song wikipedia,"[[Single by Justin Bieber featuring Ludacris],...",[Single by Justin Bieber featuring Ludacris],[No genre],No genre,No genre
Song B,Song B,Drift away song wikipedia,"[[Single by Dobie Gray], [Soft rock, R&B [1]]]",[Single by Dobie Gray],"[Soft rock, R&B [1]]","Soft rock, R&B [1]","Soft rock, R&B"
Song_C,Song_C,If I had you song simple wikipedia,"[[Single by Adam Lambert], [Electronic rock, d...",[Single by Adam Lambert],"[Electronic rock, dance-pop]",Electronic rock dance-pop,Electronic rock dance-pop


In [285]:
songs.loc[songs.test_6.str.contains("\[")]

Unnamed: 0,alpha,beta,test,test_4,test_5,test_6
Song B,Song B,Drift away song wikipedia,"[[Single by Dobie Gray], [Soft rock, R&B [1]]]",[Single by Dobie Gray],"[Soft rock, R&B [1]]","Soft rock, R&B [1]"


In [312]:
songs["test_7"] = songs.test_6.apply(lambda x: x.replace("[1]", ""))

In [313]:
songs

Unnamed: 0,alpha,beta,test,test_4,test_5,test_6,test_7
Song A,Song A,baby song wikipedia,"[[Single by Justin Bieber featuring Ludacris],...",[Single by Justin Bieber featuring Ludacris],[No genre],No genre,No genre
Song B,Song B,Drift away song wikipedia,"[[Single by Dobie Gray], [Soft rock, R&B [1]]]",[Single by Dobie Gray],"[Soft rock, R&B [1]]","Soft rock, R&B [1]","Soft rock, R&B"
Song_C,Song_C,If I had you song simple wikipedia,"[[Single by Adam Lambert], [Electronic rock, d...",[Single by Adam Lambert],"[Electronic rock, dance-pop]",Electronic rock dance-pop,Electronic rock dance-pop


In [292]:
songs.loc[songs.test_6.str.contains("R&B")]

Unnamed: 0,alpha,beta,test,test_4,test_5,test_6,test_7
Song B,Song B,Drift away song wikipedia,"[[Single by Dobie Gray], [Soft rock, R&B [1]]]",[Single by Dobie Gray],"[Soft rock, R&B [1]]","Soft rock, R&B [1]","Soft rock, R&B"
