<a href="https://colab.research.google.com/github/LCSLITX/Colab_fun/blob/main/Where_are_the_top_500_companies_from.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Where are the top 500 companies from.

I just created this notebook to have fun and to improve [this map from a Reddit post](https://www.reddit.com/r/MapPorn/comments/1fbb327/where_are_the_top_500_most_valuable_companies_from/).


In [1]:
# Install all libs needed to execute the code.
!pip install selenium
!pip install google-colab-selenium
!pip install pycountry==20.7.3



In [2]:
import json
import pandas as pd
import plotly.express as px
import google_colab_selenium as gs
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import pycountry

In [3]:
# Initialize selenium driver and go to Fortune page of global 500 companies.
driver = gs.Chrome()
driver.get('https://fortune.com/ranking/global500/search/')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [4]:
# Extract the list of companies from the page and initialize a variable called `companies`.
content = json.loads(driver.find_element(By.CSS_SELECTOR, 'script#__NEXT_DATA__').get_attribute('innerHTML'))
companies = content['props']['pageProps']['franchiseList']['items']

In [5]:
# Select and isolate only the data we need.
result = [{
    "rank": company['rank'],
    "name": company['name'],
    "country": company['data']['Country / Territory'],
} for company in companies]

In [6]:
# Count how many companies comes from each country.
data = {}
for r in result:
  s = r['country']
  data[s] = data.get(s, 0) + 1

# Fix names of countries
data['US'] = data.pop('U.S.')
data['Republic of Korea'] = data.pop('South Korea')

In [7]:
# Create a dataFrame
df = pd.DataFrame([[d, data[d]] for d in data], columns=['country', 'quantity'])

In [8]:
# Add respective country code to plotly work properly as it don't recognize country names.
country_mapping = {}
for country in df['country']:
    try:
        country_obj = pycountry.countries.search_fuzzy(country)[0]
        country_mapping[country] = country_obj.alpha_3
    except LookupError:
        print(f"Could not find country: {country}")
        country_mapping[country] = None

df['country_code'] = df['country'].map(country_mapping)

In [9]:
# set colorscale different than default
colorscale = ["rgb(255, 51, 51)", "rgb(210, 231, 154)", "rgb(94, 179, 39)", "rgb(67, 136, 33)", "rgb(33, 74, 12)"]


# plot map
fig = px.choropleth(df, locations='country_code', color='quantity', color_continuous_scale=colorscale, projection='natural earth', title='Where are the top 500 companies from')
fig.show()