# Maps of music tours
Example of script for making maps of music tour by singers based on Wikipedia information, using pandas, geopandas and geopy.

In [None]:
import pandas
import geopandas
from pyproj import CRS
import geoplot
import requests
from bs4 import BeautifulSoup
import geopy
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
import matplotlib.pyplot as plt

## Scraping data from Wikipedia page
Defining the target page with a table containing the tour date.

In [None]:
# Defining the url of the Wikipedia page to be scraped
page_url = 'https://en.wikipedia.org/wiki/The_Mrs._Carter_Show_World_Tour#Shows'
# Verifying the possibility to scrape the given page
response=requests.get(page_url)
print(response.status_code)

In [None]:
# Parse data from the html into a beautifulsoup object
soup = BeautifulSoup(response.text, 'html.parser')
tour_places=soup.find('table',{'class':"wikitable"})

In [None]:
# Checking a portion of the resulting object
tour_places.text[:500]

In [None]:
# Transforming the response into a pandas object
df=pandas.read_html(str(tour_places))
# Converting list to dataframe
df=pandas.DataFrame(df[0])
df.head()
df.tail()

In [None]:
# Checking column titles
df.columns

In [None]:
# Creating a new dataframe containing only the columns of interest
new_df = df[['Date', 'City', 'Country','Venue', 'Attendance', 'Revenue']]
# Removing the last row contatining the Total
new_df = new_df.drop([139])
new_df

In [None]:
# Replacing text values containing a comma
new_df = new_df.replace('Washington, D.C.', 'Washington D.C.')

In [None]:
# Creating new column in the dataframe with the addresses
new_df['location'] = new_df['City'] + ', ' + new_df['Country']
new_df['location']

In [None]:
new_df.head()

In [None]:
# Defining the locator element
locator = Nominatim(user_agent='prova')
# Defining a function to delay between geocoding calls
geocode = RateLimiter(locator.geocode, min_delay_seconds=1.5)
# Creating location column
new_df['location2'] = new_df['location'].apply(geocode)
# Creating longitude, laatitude and altitude from location column (returns tuple)
new_df['point'] = new_df['location2'].apply(lambda loc: tuple(loc.point) if loc else None)
# Splitting point column into latitude, longitude and altitude columns
new_df[['latitude', 'longitude', 'altitude']] = pandas.DataFrame(new_df['point'].tolist(), index=new_df.index)

In [None]:
new_df.head()

In [None]:
# Checking the presence of null values
new_df['latitude'].isna().sum()

In [None]:
# Checking rows where the geolocator failed to attach coordinates values
new_df[new_df['latitude'].isna()]

In [None]:
# Cleaning the dataframe by removing the not relevant null values
new_df = new_df[new_df['latitude'].notna()]
new_df.reset_index(drop=True, inplace=True)
new_df.head()

In [None]:
# Create geodataframe from dataframe
gdf = geopandas.GeoDataFrame(new_df, geometry=geopandas.points_from_xy(new_df.longitude, new_df.latitude))

In [None]:
gdf.head()

In [None]:
# Adding world base map
world = geopandas.read_file('ne_10m_admin_0_countries.shp')

In [None]:
#Plotting results
fig, ax = plt.subplots(figsize=(24,18))
world.plot(ax=ax, alpha=0.4, color='grey')
gdf.plot(ax=ax)

In [None]:
gdf.columns
col_rename= {"Date": "Date",
             "'City', 'Leg 1 — Europe[6][123][124][125]'": "City",
             "'Country', 'Leg 1 — Europe[6][123][124][125]'": "Country",
             "'Venue', 'Leg 1 — Europe[6][123][124][125]')": "Venue",
             "'Attendance', 'Leg 1 — Europe[6][123][124][125]'": "Attendance",
             "'Revenue', 'Leg 1 — Europe[6][123][124][125]'": "Revenue"
}
gdf.columns = ['Date', 'City', 'Country', 'Venue', 'Attendance', 'Revenue', 'location', 'location2', 'point', 'latitude', 'longitude', 'altitude', 'geometry']
gdf.head()

In [None]:
gdf['Revenue'] = gdf['Revenue'].str.replace(',', '')
gdf['Revenue'] = gdf['Revenue'].str.replace('$', '')
gdf['Revenue'] = gdf['Revenue'].str.replace("a", '')
gdf['Revenue'] = gdf['Revenue'].str.replace("k", '')
gdf['Revenue'] = gdf['Revenue'].str.replace("[", '')
gdf['Revenue'] = gdf['Revenue'].str.replace("]", '')
gdf['Revenue'] = gdf['Revenue'].astype(int)

In [None]:
gdf.tail(100)

In [None]:
gdf['Attendance'] = gdf['Attendance'].str.split(' / ').str[0]
gdf['Attendance'] = gdf['Attendance'].str.replace(',', '')
gdf['Attendance'] = gdf['Attendance'].str.replace('[', '')
gdf['Attendance'] = gdf['Attendance'].str.replace(']', '')
gdf['Attendance'] = gdf['Attendance'].str.replace('a', '')
gdf['Attendance'] = gdf['Attendance'].str.replace('k', '')
gdf['Attendance'] = gdf['Attendance'].astype(int)

In [None]:
gdf.head()

In [None]:
new=gdf.copy()

In [None]:
for i in range(len(gdf)):
    fig = plt.figure(facecolor="black")
    fig, ax = plt.subplots(figsize=(15, 15))
    lat = gdf.latitude[i]
    lon = gdf.longitude[i]
    ortho = CRS.from_proj4("+proj=ortho +lat_0={} +lon_0={} +x_0=0 +y_0=0 +a=6370997 +b=6370997 +units=m +no_defs".format(lat, lon))
    ortho = world.to_crs(ortho)
    plt.style.context('Solarize_Light2')
    new.loc[[i], 'geometry'].buffer(6400000).plot(
        color='#F3F4ED',
        alpha=0.9, 
        edgecolor='black',
        ax=ax,
        linewidth=2,
    )
    ortho.plot(
        color='#536162',
        edgecolor='none',
        ax=ax,
    )
    gdf.loc[[i], 'geometry'].plot(
        facecolors='#FF5F00', 
        ax=ax, 
        #alpha=0.9, 
        edgecolor='#FF5F00', 
        linewidth=0.7,
    )
    plt.title(str(gdf.iloc[i]['location']) + ' - ' + str(gdf.iloc[i]['Date']),  fontname="Special Elite", color="black", fontsize="20")
    plt.axis('off')
    filename = "images/fig" + str(i) + ".jpg"
    fig.savefig(filename)
    plt.close(fig)