# scraping  list of all countries of the earth from wikipedia

In [3]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [4]:

url = 'https://en.wikipedia.org/wiki/List_of_sovereign_states'

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}
response = requests.get(url, headers=headers)
print(response)
soup = BeautifulSoup(response.content, "html.parser")

# print(soup)

table = soup.find(class_="sortable wikitable sticky-header")
country_names = []
un_member_status = []
sovereignity_disputes = []
cln_counter = 1

if table:
    rows = table.find_all("tr")
    for row in rows:
        cln_counter = 1
        cols = row.find_all(["td"])
        for col in cols:
            
            col_text = col.text.strip().replace("\xa0", "")
            if cln_counter == 1:
                ary_col_text = col_text.split("–")
                ary_col_text_2 = ary_col_text[0].split(",")
                country_names.append(ary_col_text_2[0])
            elif cln_counter == 2:
                un_member_status.append(col_text)
            elif cln_counter == 3:
                sovereignity_disputes.append(col_text)

            cln_counter +=1
    
    # Create DataFrame
    df_countries = pd.DataFrame({
        'country_names': country_names,
        'un_member_status': un_member_status,
        'sovereignity_disputes': sovereignity_disputes
    })
    df_countries.drop_duplicates().reset_index(drop=True)

    df_countries.to_csv('outputs/countries_of_the_world.csv', index=False) 
    print(df_countries)

else:
    print("Table not found")


<Response [200]>
    country_names   un_member_status sovereignity_disputes
0     Afghanistan  A UN member state                A None
1         Albania  A UN member state                A None
2         Algeria  A UN member state                A None
3         Andorra  A UN member state                A None
4          Angola  A UN member state                A None
..            ...                ...                   ...
190     Venezuela  A UN member state                A None
191       Vietnam  A UN member state                A None
192         Yemen  A UN member state                A None
193        Zambia  A UN member state                A None
194      Zimbabwe  A UN member state                A None

[195 rows x 3 columns]


# API Call 
## get top 10 hits from spotify for each country

In [None]:
import config
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

#Initialize SpotiPy with user credentials
sp =spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=config.SPOTIPY_CLIENT_ID, client_secret=config.SPOTIPY_CLIENT_SECRET))

def get_music_hits(country_name):
    print("country_name: " + country_name)
    playlist_results = sp.search(q=f"Top-10 Hits in {country_name}", type="playlist", limit=15)

    playlists = [p for p in playlist_results['playlists']['items'] if p]  # skip None

    
    # Pick the first playlist
    try:
        playlist = playlists[0]
        playlist_id = playlist['id']
        playlist_name = playlist['name']

        tracks = []
        results = sp.playlist_tracks(playlist_id)
        tracks.extend(results['items'])

        while results['next']:
            results = sp.next(results)
            tracks.extend(results['items'])

        # Extract track names and artists
        track_list = []
        song_limit = 25
        song_counter = 0
        for item in tracks:
            song_counter +=1
            if song_counter > song_limit:
                break
            
            track = item['track']
            try:
                track_list.append({
                    "Song": track['name'],
                    "Artist": track['artists'][0]['name']           
                })
            except:
                continue
        df_tracks = pd.DataFrame(track_list)
        df_tracks['country_name'] = country_name
    

        # print(f"Playlist: {playlist_name}, ID: {playlist_id}")
        # print(df_tracks)
        return df_tracks
    except:        
        return None # no Playlist found



df_all_hits = []
for country_name in df_countries["country_names"]:
    df = get_music_hits(country_name)   
    if not df is None:
        df_all_hits.append(df)

# Combine all DataFrames in the list into a single DataFrame
combined_df = pd.concat(df_all_hits, ignore_index=True)

print(combined_df)
combined_df.to_csv('outputs/music_hits_all_countries.csv', index=False) 

country_name: Afghanistan
Playlist: Afghan Hits 🇦🇫 , ID: 4k3PsJDXjxqqObZZI6Zu8r
country_name: Albania
Playlist: Shqip HITS - 2025, ID: 6nqlcLv75XvVRhY4afNEDy
country_name: Algeria
Playlist: Top 50: Algeria, ID: 4eSH1o9RQqnbN8ZX7CIVUI
country_name: Andorra
Playlist: Andorra, ID: 5kJUnP0FH11NHhoU92z9kN
country_name: Angola
Playlist: Hits - Top 100 Angola 2025 🇦🇴, ID: 4zqrD7Pi0vIzrN6LVmq6gR
country_name: Antigua and Barbuda
Playlist: Antigua and Barbuda, ID: 38bzETkEYJNkcR8vhqMlMz
country_name: Argentina
Playlist: TOP ARGENTINA 2025, ID: 6qv68QBGb4BKfLUb92MSik
country_name: Armenia
Playlist: Armenian HITS🇦🇲, ID: 5gHR8aj3itCt3LK9NkmaYN
country_name: Australia
Playlist: Top 100 most streamed songs on Spotify *Updated*, ID: 5ABHKGoOzxkaa28ttQV9sE
country_name: Austria
Playlist: Hits 2025 🔥 🔥  Aktuelle Charts und die besten Songs 2025 🔝 🙌, ID: 1CnDCN10TJZjw6K2H3gNRv
country_name: Azerbaijan
Playlist: Azerbaijan Top 100 (Azərbaycan Top 100), ID: 3c01GeeAVg0kPqjyX7dfhj
country_name: Bahamas
Pla

In [7]:
# Combine all DataFrames in the list into a single DataFrame
combined_df = pd.concat(df_all_hits, ignore_index=True)

print(combined_df)
combined_df.to_csv('outputs/music_hits_all_countries.csv', index=False) 

                     Song             Artist country_name
0          Gulha Mekhanda      Samir Roashan  Afghanistan
1             Dil Meravad  Madina Aknazarova  Afghanistan
2     Sitara-E- Bal Balli       Habib Qaderi  Afghanistan
3           Khaake Khasta       Farhad Darya  Afghanistan
4           Angoore Labet       Farhad Darya  Afghanistan
...                   ...                ...          ...
1942      CATCH N RELEASE                ExQ     Zimbabwe
1943               Benita         Leo Magozz     Zimbabwe
1944              Di Best           Biggy MC     Zimbabwe
1945               Shungu       Nicky Genius     Zimbabwe
1946      Kusvika Rinyure            Oriyano     Zimbabwe

[1947 rows x 3 columns]
