# **Data Extraction**

In [2]:
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd
import sqlite3
import random

In [3]:
# List of user agents
user_agents = [
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
    'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
    # Add more user agents as needed
]

# Select a random user agent
user_agent = random.choice(user_agents)

# Define headers with the selected user agent
headers = {
    'User-Agent': user_agent,
    'Accept-Language': 'en-US,en;q=0.5',
    'Connection': 'keep-alive',}
    # Add more headers as needed

## Assets

In [4]:
url = 'https://companiesmarketcap.com/assets-by-market-cap/'
r = requests.get(url, headers=headers)
soup = BeautifulSoup(r.text, 'lxml')
table = soup.find('table', {'class':'default-table table marketcap-table dataTable'})


entries = table.find_all('tr')
entries = entries[1:]

names = []
trends =[]
pct_change = []
m_cap = []
prices = []
links = []
asset_desc = []

for entry in entries:
    name = entry.find('div', {'class':'company-name'})
    names.append(name.text.strip())

    earn = entry.find('td', {'class':'rh-sm'})
    trend = 'down' if earn.find('span', {'class': 'percentage-red'}) else 'up'
    trends.append(trend)
    
    day_ct = earn.text
    pct_change.append(day_ct)

    fintext = entry.find_all('td', {'class':'td-right'})
    cap = fintext[1].text.strip()
    m_cap.append(cap)
    
    price = fintext[2].text.strip()
    prices.append(price)
    

    url = entry.find('a')
    if url:
        link = url['href']
        full_link = f'https://companiesmarketcap.com{link}'
    else:
        full_link = None

    links.append(full_link)

for link in links:
    if link:
        r = requests.get(link)
        soup = BeautifulSoup(r.text, 'lxml')
        
        div = soup.find('div', {'class': 'col-sm-9'})
        if not div:
            div = soup.find('div', {'class': 'col-lg-4 company-description'})
        
        text = div.text.strip() if div else None
        asset_desc.append(text)
    else:
        asset_desc.append(None)      
        
# Save to dataframe        
asset_df = pd.DataFrame({'Asset': names, 'Market Cap': m_cap, 'Share Price': prices,
                   'Day Change': pct_change, 'Trend': trends,  'Description': asset_desc
                   })


# Data cleaning

# Function to convert values to billions
def convert_to_billion(value):
    if 'T' in value:
        return float(value.replace('T', '')) * 1000
    elif 'B' in value:
        return float(value.replace('B', ''))
    else:
        return float(value)

# Apply the conversion function and strip '$' and 'T'
asset_df['Market Cap'] = asset_df['Market Cap'].str.replace(r'[$,]', '', regex=True).apply(convert_to_billion)

# Rename the column
asset_df = asset_df.rename(columns={'Market Cap': 'Market Cap($B)'})
asset_df['Share Price'] = asset_df['Share Price'].str.replace(r'[$, ,]', '', regex=True).astype(float)
asset_df['As Of'] = pd.Timestamp.now().strftime('%Y-%m-%d %H:%M')
asset_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 7 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Asset           100 non-null    object 
 1   Market Cap($B)  100 non-null    float64
 2   Share Price     100 non-null    float64
 3   Day Change      100 non-null    object 
 4   Trend           100 non-null    object 
 5   Description     62 non-null     object 
 6   As Of           100 non-null    object 
dtypes: float64(2), object(5)
memory usage: 5.6+ KB


In [5]:
# Save to SQLite Database
conn = sqlite3.connect('data/Marketcap.db')
cursor = conn.cursor()

# Update table
asset_df.to_sql('Assets', conn, if_exists='append', index=False)

conn.commit()

print('Data successfully added to database table')

# Close the connection
conn.close()

Data successfully added to database table


## Earnings

In [6]:
# Generate a list of URLs for the first 10 pages of the most profitable companies

urls = []
base = 'https://companiesmarketcap.com/most-profitable-companies/page/'
for pg in range(1, 11):
  url = f'{base}{pg}'
  urls.append(url)


In [7]:
all_df = []
for url in urls:
  r = requests.get(url, headers=headers)
  soup = BeautifulSoup(r.text, 'lxml')
  table = soup.find('tbody')

  names = soup.find_all('div', {'class':'company-name'})
  names = [name.text.strip() for name in names]

  trends =[]
  earnings = soup.find_all('td', {'class':'rh-sm'})
  for earn in earnings:
      if earn.find('span', {'class':'percentage-red'}):
        trend = 'down'
      else:
        trend = 'up'
      trends.append(trend)

  day_ct = [earning.text for earning in earnings]

  tds = soup.find_all('td', {'class':'td-right'})
  earnings = []
  prices = []
  for i in range(0, len(tds), 3):
        earnings.append(tds[i+1].text.strip())
        prices.append(tds[i+2].text.strip())

  countries = soup.find_all('span',{'class':'responsive-hidden'})
  countries = countries[1:]
  countries = [country.text.strip() for country in countries]

  data = {'Company':names, 'Earnings':earnings, 'Share Price':prices, 'Pct_change':day_ct, 'Trend':trends, 'Country':countries }
  df = pd.DataFrame(data)

  # Data cleaning
  # Function to convert values to billions
  def convert_to_billion(value):
      if 'T' in value:
          return float(value.replace('T', '')) * 1000
      elif 'B' in value:
          return float(value.replace('B', ''))
      else:
          return float(value)

  # Apply the conversion function and strip '$' and 'T'
  df['Earnings'] = df['Earnings'].str.replace(r'[$,]', '', regex=True).apply(convert_to_billion)

  # Rename the column
  df = df.rename(columns={'Earnings': 'Earnings($B)'})
  df['Share Price'] = df['Share Price'].str.replace(r'[$, ,]', '', regex=True).astype(float)
  df['Pct_change'] = df['Pct_change'].str.replace(r'%', '', regex=True).astype(float)
  all_df.append(df)

# Concatenate all DataFrames into one
earnings_df = pd.concat(all_df, ignore_index=True)

# Drop duplicates 
earnings_df.drop_duplicates(subset='Company', keep='first', inplace=True)
earnings_df['As Of'] = pd.Timestamp.now().strftime('%Y-%m-%d %H:%M')

# Sort alphabetically and reset index
earnings_df = earnings_df.sort_values(by='Company')
earnings_df = earnings_df.reset_index(drop=True)
earnings_df['As Of'] = pd.Timestamp.now().strftime('%Y-%m-%d %H:%M')

earnings_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Company       1000 non-null   object 
 1   Earnings($B)  1000 non-null   float64
 2   Share Price   1000 non-null   float64
 3   Pct_change    1000 non-null   float64
 4   Trend         1000 non-null   object 
 5   Country       1000 non-null   object 
 6   As Of         1000 non-null   object 
dtypes: float64(3), object(4)
memory usage: 54.8+ KB


In [8]:
earnings_df.head()

Unnamed: 0,Company,Earnings($B),Share Price,Pct_change,Trend,Country,As Of
0,3i Group,5.66,39.66,0.13,down,UK,2024-07-23 20:30
1,7-Eleven,1.67,11.52,0.06,down,Japan,2024-07-23 20:30
2,ABB,4.84,55.42,1.6,up,Switzerland,2024-07-23 20:30
3,ABN AMRO,4.05,18.0,1.38,up,Netherlands,2024-07-23 20:30
4,AES,1.46,17.78,1.51,up,USA,2024-07-23 20:30


In [9]:
# Save to SQLite Database
conn = sqlite3.connect('data/Marketcap.db')
cursor = conn.cursor()

# Update table
earnings_df.to_sql('Earnings', conn, if_exists='append', index=False)

conn.commit()

print('Data successfully added to database table')

# Close the connection
conn.close()

Data successfully added to database table


## Market cap

In [10]:
# Generate urls for companies with the highest market caps

base = 'https://companiesmarketcap.com/page/'
urls = []
for pg in range(1,11):
  url = f'{base}{pg}'
  urls.append(url)

In [11]:
all_df = []
for url in urls:
  r = requests.get(url, headers=headers)
  soup = BeautifulSoup(r.text, 'lxml')
  table = soup.find('tbody')

  names = soup.find_all('div', {'class':'company-name'})
  names = [name.text.strip() for name in names]

  trends =[]
  earnings = soup.find_all('td', {'class':'rh-sm'})
  for earn in earnings:
      if earn.find('span', {'class':'percentage-red'}):
        trend = 'down'
      else:
        trend = 'up'
      trends.append(trend)

  day_ct = [earning.text for earning in earnings]

  tds = soup.find_all('td', {'class':'td-right'})
  earnings = []
  prices = []
  for i in range(0, len(tds), 3):
        earnings.append(tds[i+1].text.strip())
        prices.append(tds[i+2].text.strip())

  countries = soup.find_all('span',{'class':'responsive-hidden'})
  countries = countries[1:]
  countries = [country.text.strip() for country in countries]

  data = {'Company':names, 'Market cap':earnings, 'Share Price':prices, 'Pct_change':day_ct, 'Trend':trends, 'Country':countries }
  df = pd.DataFrame(data)

  # Data cleaning
  # Function to convert values to billions
  def convert_to_billion(value):
      if 'T' in value:
          return float(value.replace('T', '')) * 1000
      elif 'B' in value:
          return float(value.replace('B', ''))
      else:
          return float(value)

  # Apply the conversion function and strip '$' and 'T'
  df['Market cap'] = df['Market cap'].str.replace(r'[$,]', '', regex=True).apply(convert_to_billion)

  # Rename the column
  df = df.rename(columns={'Market cap': 'Market cap($B)'})
  df['Share Price'] = df['Share Price'].str.replace(r'[$, ,]', '', regex=True).astype(float)
  df['Pct_change'] = df['Pct_change'].str.replace(r'%', '', regex=True).astype(float)
  all_df.append(df)

# Concatenate all DataFrames into one
market_df = pd.concat(all_df, ignore_index=True)
market_df.drop_duplicates(subset='Company', keep='first', inplace=True)

# Sort alphabetically and reset index
market_df = market_df.sort_values(by='Company')
market_df = market_df.reset_index(drop=True)
market_df['As Of'] = pd.Timestamp.now().strftime('%Y-%m-%d %H:%M')

market_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 7 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Company         1000 non-null   object 
 1   Market cap($B)  1000 non-null   float64
 2   Share Price     1000 non-null   float64
 3   Pct_change      1000 non-null   float64
 4   Trend           1000 non-null   object 
 5   Country         1000 non-null   object 
 6   As Of           1000 non-null   object 
dtypes: float64(3), object(4)
memory usage: 54.8+ KB


In [12]:
market_df.head()

Unnamed: 0,Company,Market cap($B),Share Price,Pct_change,Trend,Country,As Of
0,3M,58.03,104.87,0.1,down,USA,2024-07-23 20:30
1,3i Group,38.24,39.66,0.13,down,UK,2024-07-23 20:30
2,7-Eleven,29.88,11.52,0.06,down,Japan,2024-07-23 20:30
3,ABB,102.42,55.42,1.6,up,Switzerland,2024-07-23 20:30
4,ABB India,19.0,89.71,2.77,down,India,2024-07-23 20:30


In [13]:
# Save to SQLite Database
conn = sqlite3.connect('data/Marketcap.db')
cursor = conn.cursor()

# Update table
market_df.to_sql('Marketcap', conn, if_exists='append', index=False)

conn.commit()

print('Data successfully added to database table')

# Close the connection
conn.close()

Data successfully added to database table


## Revenue

In [14]:
base = 'https://companiesmarketcap.com/largest-companies-by-revenue/page/'
urls = []
for pg in range(1,11):
  url = f'{base}{pg}'
  urls.append(url)

In [15]:
all_df = []
for url in urls:
  r = requests.get(url, headers=headers)
  soup = BeautifulSoup(r.text, 'lxml')
  table = soup.find('tbody')

  names = soup.find_all('div', {'class':'company-name'})
  names = [name.text for name in names]

  trends =[]
  earnings = soup.find_all('td', {'class':'rh-sm'})
  for earn in earnings:
      if earn.find('span', {'class':'percentage-red'}):
        trend = 'down'
      else:
        trend = 'up'
      trends.append(trend)

  day_ct = [earning.text for earning in earnings]

  tds = soup.find_all('td', {'class':'td-right'})
  earnings = []
  prices = []
  for i in range(0, len(tds), 3):
        earnings.append(tds[i+1].text.strip())
        prices.append(tds[i+2].text.strip())

  countries = soup.find_all('span',{'class':'responsive-hidden'})
  countries = countries[1:]
  countries = [country.text for country in countries]

  data = {'Company':names, 'Revenue':earnings, 'Share Price':prices, 'Pct_change':day_ct, 'Trend':trends, 'Country':countries }
  df = pd.DataFrame(data)

  # Data cleaning
  # Function to convert values to billions
  def convert_to_billion(value):
      if 'T' in value:
          return float(value.replace('T', '')) * 1000
      elif 'B' in value:
          return float(value.replace('B', ''))
      else:
          return float(value)

  # Apply the conversion function and strip '$' and 'T'
  df['Revenue'] = df['Revenue'].str.replace(r'[$,]', '', regex=True).apply(convert_to_billion)

  # Rename the column
  df = df.rename(columns={'Revenue': 'Revenue($B)'})
  df['Share Price'] = df['Share Price'].str.replace(r'[$, ,]', '', regex=True).astype(float)
  df['Pct_change'] = df['Pct_change'].str.replace(r'%', '', regex=True).astype(float)
  all_df.append(df)

# Concatenate all DataFrames into one
revenue_df = pd.concat(all_df, ignore_index=True)

revenue_df.drop_duplicates(subset='Company', keep='first', inplace=True)

# Sort alphabetically and reset index
revenue_df = revenue_df.sort_values(by='Company')
revenue_df = revenue_df.reset_index(drop=True)
revenue_df['As Of'] = pd.Timestamp.now().strftime('%Y-%m-%d %H:%M')

revenue_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Company      1000 non-null   object 
 1   Revenue($B)  1000 non-null   float64
 2   Share Price  1000 non-null   float64
 3   Pct_change   1000 non-null   float64
 4   Trend        1000 non-null   object 
 5   Country      1000 non-null   object 
 6   As Of        1000 non-null   object 
dtypes: float64(3), object(4)
memory usage: 54.8+ KB


In [16]:
revenue_df.head(10)

Unnamed: 0,Company,Revenue($B),Share Price,Pct_change,Trend,Country,As Of
0,3M,32.65,104.87,0.1,down,USA,2024-07-23 20:30
1,7-Eleven,77.07,11.52,0.06,down,Japan,2024-07-23 20:30
2,A2A,18.24,2.13,0.46,up,Italy,2024-07-23 20:30
3,ABB,32.24,55.42,1.6,up,Switzerland,2024-07-23 20:30
4,AECOM,15.34,90.23,0.02,up,USA,2024-07-23 20:30
5,AEGON\n,12.01,6.34,0.86,down,Netherlands,2024-07-23 20:30
6,AES,12.51,17.78,1.51,up,USA,2024-07-23 20:30
7,AGC,13.85,33.67,1.21,up,Japan,2024-07-23 20:30
8,AGCO,14.0,99.69,2.0,down,USA,2024-07-23 20:30
9,AIA,20.69,6.88,0.19,down,Hong Kong,2024-07-23 20:30


In [17]:
# Save to SQLite Database
conn = sqlite3.connect('data/Marketcap.db')
cursor = conn.cursor()

# Update table
revenue_df.to_sql('Revenue', conn, if_exists='append', index=False)

conn.commit()

print('Data successfully added to database table')

# Close the connection
conn.close()

Data successfully added to database table


## Employee size

In [18]:
base = 'https://companiesmarketcap.com/largest-companies-by-number-of-employees/page/'
urls = []
for pg in range(1,11):
  url = f'{base}{pg}'
  urls.append(url)

In [19]:
all_df = []
for url in urls:
  r = requests.get(url, headers=headers)
  soup = BeautifulSoup(r.text, 'lxml')
  table = soup.find('tbody')

  names = soup.find_all('div', {'class':'company-name'})
  names = [name.text.strip() for name in names]

  trends =[]
  earnings = soup.find_all('td', {'class':'rh-sm'})
  for earn in earnings:
      if earn.find('span', {'class':'percentage-red'}):
        trend = 'down'
      else:
        trend = 'up'
      trends.append(trend)

  day_ct = [earning.text for earning in earnings]

  tds = soup.find_all('td', {'class':'td-right'})
  employees = []
  prices = []
  for i in range(0, len(tds), 3):
        employees.append(tds[i+1].text.strip())
        prices.append(tds[i+2].text.strip())

  countries = soup.find_all('span',{'class':'responsive-hidden'})
  countries = countries[1:]
  countries = [country.text.strip() for country in countries]

  data = {'Company':names, 'Employees':employees, 'Share Price':prices, 'Pct_change':day_ct, 'Trend':trends, 'Country':countries }
  df = pd.DataFrame(data)

  # Data cleaning
  df['Employees'] = df['Employees'].str.replace(r'[,]', '', regex=True).astype(int)
  df['Share Price'] = df['Share Price'].str.replace(r'[$, ,]', '', regex=True).astype(float)
  df['Pct_change'] = df['Pct_change'].str.replace(r'%', '', regex=True).astype(float)
  all_df.append(df)

# Concatenate all DataFrames into one
employee_df = pd.concat(all_df, ignore_index=True)

employee_df.drop_duplicates(subset='Company', keep='first', inplace=True)

# Sort alphabetically and reset index
employee_df = employee_df.sort_values(by='Company')
employee_df = employee_df.reset_index(drop=True)
employee_df['As Of'] = pd.Timestamp.now().strftime('%Y-%m-%d %H:%M')

employee_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Company      1000 non-null   object 
 1   Employees    1000 non-null   int64  
 2   Share Price  1000 non-null   float64
 3   Pct_change   1000 non-null   float64
 4   Trend        1000 non-null   object 
 5   Country      1000 non-null   object 
 6   As Of        1000 non-null   object 
dtypes: float64(2), int64(1), object(4)
memory usage: 54.8+ KB


In [20]:
employee_df.head(10)

Unnamed: 0,Company,Employees,Share Price,Pct_change,Trend,Country,As Of
0,3M,85000,104.87,0.1,down,USA,2024-07-23 20:30
1,7-Eleven,84154,11.52,0.06,down,Japan,2024-07-23 20:30
2,ABB,108700,55.42,1.6,up,Switzerland,2024-07-23 20:30
3,ABM Industries,100000,54.51,0.17,up,USA,2024-07-23 20:30
4,AECOM,52000,90.23,0.02,up,USA,2024-07-23 20:30
5,AGC,56724,33.67,1.21,up,Japan,2024-07-23 20:30
6,AGCO,27900,99.69,2.0,down,USA,2024-07-23 20:30
7,AIER Eye Hospital,32326,1.42,3.99,down,China,2024-07-23 20:30
8,ALSOK,39039,6.2,0.87,up,Japan,2024-07-23 20:30
9,ALTEN,57000,111.8,1.34,down,France,2024-07-23 20:30


In [21]:
# Save to SQLite Database
conn = sqlite3.connect('data/Marketcap.db')
cursor = conn.cursor()

# Update table
employee_df.to_sql('Employees', conn, if_exists='append', index=False)

conn.commit()

print('Data successfully added to database table')

# Close the connection
conn.close()

Data successfully added to database table


# **Transformation**

In [28]:
# Select only the relevant columns from each DataFrame
earns = earnings_df[['Company', 'Earnings($B)', 'Country', 'Trend']]
cap = market_df[['Company', 'Market cap($B)']]
revenue = revenue_df[['Company', 'Revenue($B)']]
employee = employee_df[['Company', 'Employees', 'Share Price']]

# Merge the DataFrames on the 'Company' column
merged_df = pd.merge(earns, cap, on='Company', suffixes=('_earn', '_cap'))
merged_df = pd.merge(merged_df, revenue, on='Company', suffixes=('', '_rev'))
merged_df = pd.merge(merged_df, employee, on='Company', suffixes=('', '_emp'))

# Reorder columns
cols = merged_df.columns.tolist()
cols.remove('Trend')
cols.remove('Country')  
cols.append('Country')
cols.append('Trend')
merged_df = merged_df[cols] 
merged_df['As Of'] = pd.Timestamp.now().strftime('%Y-%m-%d %H:%M')

merged_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 340 entries, 0 to 339
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Company         340 non-null    object 
 1   Earnings($B)    340 non-null    float64
 2   Market cap($B)  340 non-null    float64
 3   Revenue($B)     340 non-null    float64
 4   Employees       340 non-null    int64  
 5   Share Price     340 non-null    float64
 6   Country         340 non-null    object 
 7   Trend           340 non-null    object 
 8   As Of           340 non-null    object 
dtypes: float64(4), int64(1), object(4)
memory usage: 24.0+ KB


In [29]:
merged_df.head(10)

Unnamed: 0,Company,Earnings($B),Market cap($B),Revenue($B),Employees,Share Price,Country,Trend,As Of
0,7-Eleven,1.67,29.88,77.07,84154,11.52,Japan,down,2024-07-23 20:32
1,ABB,4.84,102.42,32.24,108700,55.42,Switzerland,up,2024-07-23 20:32
2,ANZ Bank,6.48,59.02,13.05,40342,19.66,Australia,up,2024-07-23 20:32
3,ASML,8.63,366.66,27.46,40940,931.81,Netherlands,down,2024-07-23 20:32
4,AT&T,18.95,132.54,122.31,148290,18.49,USA,down,2024-07-23 20:32
5,AXA,7.36,79.27,134.68,94705,35.2,France,up,2024-07-23 20:32
6,AbbVie,7.53,305.84,54.4,50000,173.24,USA,down,2024-07-23 20:32
7,Abbott Laboratories,6.53,180.6,40.32,114000,103.8,USA,up,2024-07-23 20:32
8,Accenture,9.37,205.84,64.47,750000,328.57,Ireland,down,2024-07-23 20:32
9,Adobe,6.57,243.01,20.42,29945,548.02,USA,down,2024-07-23 20:32


In [30]:
# Save to SQLite Database
conn = sqlite3.connect('data/Marketcap.db')
cursor = conn.cursor()

# Update table
merged_df.to_sql('CompanyMerge', conn, if_exists='append', index=False)

conn.commit()

print('Data successfully added to database table')

# Close the connection
conn.close()

Data successfully added to database table


# **Insight Extraction**

### Explore top companies

In [31]:
metrics = cols[1:-2]

print(f'Sample metrics include:')
for i, metric in enumerate(metrics, start=1):
    print(f"{i}. {metric}")

Sample metrics include:
1. Earnings($B)
2. Market cap($B)
3. Revenue($B)
4. Employees
5. Share Price


In [32]:
# Sort and display dataframe
sort_key = input('Enter a metric to sort by:')
choice = metrics[int(sort_key)-1]
sort_df = merged_df.sort_values(by=choice, ascending=False)
sort_df = sort_df.reset_index(drop=True)
print(f'\nDisplaying top companies by {choice}:\n')
sort_df.head(20)


Displaying top companies by Revenue($B):



Unnamed: 0,Company,Earnings($B),Market cap($B),Revenue($B),Employees,Share Price,Country,Trend,As Of
0,Walmart,28.92,565.94,657.33,2100000,70.36,USA,down,2024-07-23 20:32
1,Amazon,49.42,1950.0,590.74,1525000,187.33,USA,up,2024-07-23 20:32
2,Saudi Aramco,230.48,1814.0,489.43,70000,7.5,S. Arabia,down,2024-07-23 20:32
3,Sinopec,15.55,101.03,473.53,374791,0.88,China,up,2024-07-23 20:32
4,PetroChina,36.82,230.54,430.65,398440,1.31,China,down,2024-07-23 20:32
5,Apple,120.0,3444.0,381.62,150000,224.65,USA,up,2024-07-23 20:32
6,UnitedHealth,21.78,512.01,379.48,440000,556.05,USA,down,2024-07-23 20:32
7,CVS Health,12.65,72.44,360.93,219000,57.71,USA,down,2024-07-23 20:32
8,Volkswagen,23.71,59.27,348.13,656134,114.68,Germany,down,2024-07-23 20:32
9,Exxon Mobil,48.34,508.83,331.46,62000,113.41,USA,down,2024-07-23 20:32


### Explore trends

In [27]:
trend = input('Select a trend; up or down:')

filtered_df = merged_df[merged_df['Trend'] == trend]
df_size = len(filtered_df)

if trend == 'up':
  if df_size > 0:
    print(f"There are {len(filtered_df)} Companies with increasing trends today:")
    print(filtered_df['Company'].head(30))
  else:
    print("No companies found with increasing trends.")
elif trend =='down':
  if df_size > 0:
    print(f"There are {len(filtered_df)} Companies with decreasing trends today:")
    filtered_df.head(30)
  else:
    print("No companies found with decreasing trends.")
else:
  print("Invalid input. Please try again.") 


There are 142 Companies with increasing trends today:
1                                      ABB
2                                 ANZ Bank
5                                      AXA
7                      Abbott Laboratories
10              Agricultural Bank of China
12                             Air Liquide
15                                 Allianz
16                                Allstate
17                       Alphabet (Google)
18                                  Amazon
21                        American Express
22                                Amphenol
25                                     Aon
26                                   Apple
37                             BNP Paribas
38    BNY Mellon (Bank of New York Mellon)
43         Banco Bilbao Vizcaya Argentaria
44                          Banco Bradesco
45                           Bank of China
46                  Bank of Communications
49                        Becton Dickinson
51                       Boston Scientific
