## Installing Modules

**Installing Required Libraries/Modules.**

In [1]:
!pip install requests beautifulsoup4 pandas openpyxl matplotlib ipywidgets



## Importing Libraries

**Imports all necessary Python Modules/Libraries.**

In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display

## Defining the Company List

**Provides company details as input.**

In [3]:
companies = [
    {"name": "Nestle", "url": "https://www.nestle.com"},
    {"name": "Dr. Reddy's Laboratories", "url": "https://www.drreddys.com"},
    {"name": "Coca-Cola", "url": "https://www.coca-colacompany.com"},
    {"name": "Pfizer", "url": "https://www.pfizer.com"},
    {"name": "PepsiCo", "url": "https://www.pepsico.com"},
    {"name": "Johnson & Johnson", "url": "https://www.jnj.com"},
    {"name": "Danone", "url": "https://www.danone.com"},
    {"name": "Bayer", "url": "https://www.bayer.com"},
    {"name": "General Mills", "url": "https://www.generalmills.com"},
    {"name": "GlaxoSmithKline (GSK)", "url": "https://www.gsk.com"},
    {"name": "Kellogg’s", "url": "https://www.kelloggs.com"},
    {"name": "Merck & Co.", "url": "https://www.merck.com"},
    {"name": "Unilever", "url": "https://www.unilever.com"},
    {"name": "Roche", "url": "https://www.roche.com"},
    {"name": "Nestle Waters", "url": "https://www.nestlewaters.com"},
    {"name": "Sanofi", "url": "https://www.sanofi.com"},
    {"name": "Mondelez International", "url": "https://www.mondelezinternational.com"},
    {"name": "Novartis", "url": "https://www.novartis.com"},
    {"name": "Kraft Heinz", "url": "https://www.kraftheinzcompany.com"},
    {"name": "Eli Lilly and Company", "url": "https://www.lilly.com"},
    {"name": "Tyson Foods", "url": "https://www.tysonfoods.com"},
    {"name": "Teva Pharmaceuticals", "url": "https://www.tevapharm.com"},
    {"name": "Mars, Incorporated", "url": "https://www.mars.com"},
    {"name": "AbbVie", "url": "https://www.abbvie.com"},
    {"name": "Campbell Soup Company", "url": "https://www.campbellsoupcompany.com"},
    {"name": "Amgen", "url": "https://www.amgen.com"},
    {"name": "Conagra Brands", "url": "https://www.conagrabrands.com"},
    {"name": "AstraZeneca", "url": "https://www.astrazeneca.com"},
    {"name": "Molson Coors", "url": "https://www.molsoncoors.com"},
    {"name": "Boehringer Ingelheim", "url": "https://www.boehringeringelheim.com"},
    {"name": "AB InBev", "url": "https://www.abinbev.com"},
    {"name": "BASF", "url": "https://www.basf.com"},
    {"name": "Diageo", "url": "https://www.diageo.com"},
    {"name": "Procter & Gamble (P&G)", "url": "https://www.pg.com"},
    {"name": "Heineken", "url": "https://www.theheinekencompany.com"},
    {"name": "Medtronic", "url": "https://www.medtronic.com"},
    {"name": "McKesson", "url": "https://www.mckesson.com"},
    {"name": "AmerisourceBergen", "url": "https://www.amerisourcebergen.com"},
    {"name": "Cardinal Health", "url": "https://www.cardinalhealth.com"},
    {"name": "Medline Industries", "url": "https://www.medline.com"}
]

##  Scraping Data

**Scrapes websites for relevance and category.**

In [4]:
def scrape_company_info(company):
    try:
        # Fetch website content
        response = requests.get(company["url"], timeout=10)
        soup = BeautifulSoup(response.text, "html.parser")
        text = soup.text.lower()

        # Relevance check
        relevance = "Relevant" if any(keyword in text for keyword in ["probiotics", "health", "nutrition"]) else "Not Relevant"

        # Category determination
        if "food" in text or "beverage" in text:
            category = "F&B"
        elif "pharma" in text or "medicine" in text:
            category = "Pharma"
        elif "healthcare" in text or "nutrition" in text:
            category = "Healthcare"
        else:
            category = "Other"

        return {
            "Company Name": company["name"],
            "Website": company["url"],
            "Relevance": relevance,
            "Category": category
        }
    except Exception as e:
        return {
            "Company Name": company["name"],
            "Website": company["url"],
            "Relevance": "Error",
            "Category": "Error"
        }

# Collecting data
data = [scrape_company_info(company) for company in companies]

# Converting to DataFrame
df = pd.DataFrame(data)
df.to_excel("company_relevance_analysis1.xlsx", index=False)

##  Downloading the Excel File

**Saves the processed data to an Excel file and enables downloading.**

In [5]:
data = []
for company in companies:
    data.append(scrape_company_info(company))

In [6]:
df = pd.DataFrame(data)

In [8]:
df.to_excel("company_relevance_analysis1.xlsx", index=False)
from google.colab import files
files.download("company_relevance_analysis1.xlsx")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Conditional Formatting for Relevance and Visualizing Data

**Represents 'Revelant' columns in 'Green' color and 'Not Relevant' columns in 'Red' color.**

**Loading Excel File.**
**Highlights relevance, plots category distribution, and displays styled DataFrame.**

In [7]:
# Replace 'sample.xlsx' with the name of your file
file_path = 'company_relevance_analysis1.xlsx'
data = pd.read_excel(file_path)

In [8]:
from IPython.display import display, HTML

# Path to the uploaded file
file_path = 'styled_company_relevance.html'

# Display the HTML content
display(HTML(file_path))

Unnamed: 0,Company Name,Website,Relevance,Category
0,Nestle,https://www.nestle.com,Relevant,F&B
1,Dr. Reddy's Laboratories,https://www.drreddys.com,Relevant,Pharma
2,Coca-Cola,https://www.coca-colacompany.com,Not Relevant,F&B
3,Pfizer,https://www.pfizer.com,Relevant,Pharma
4,PepsiCo,https://www.pepsico.com,Relevant,F&B
5,Johnson & Johnson,https://www.jnj.com,Relevant,Pharma
6,Danone,https://www.danone.com,Relevant,F&B
7,Bayer,https://www.bayer.com,Not Relevant,Pharma
8,General Mills,https://www.generalmills.com,Relevant,F&B
9,GlaxoSmithKline (GSK),https://www.gsk.com,Relevant,Pharma


## Category Distribution Plot

**Shows the different categories of companies.**

In [12]:
import pandas as pd

# Sample data: You can replace this with the actual data you're working with
data = {
    'Company Name': ['Nestle', 'Dr. Reddy\'s Laboratories', 'Coca-Cola', 'Pfizer', 'PepsiCo', 'Johnson & Johnson', 'Danone', 'Bayer', 'General Mills', 'GlaxoSmithKline (GSK)'],
    'Website': ['https://www.nestle.com', 'https://www.drreddys.com', 'https://www.coca-colacompany.com', 'https://www.pfizer.com', 'https://www.pepsico.com', 'https://www.jnj.com', 'https://www.danone.com', 'https://www.bayer.com', 'https://www.generalmills.com', 'https://www.gsk.com'],
    'Relevance': ['Relevant', 'Relevant', 'Not Relevant', 'Relevant', 'Relevant', 'Relevant', 'Relevant', 'Not Relevant', 'Relevant', 'Relevant'],
    'Category': ['F&B', 'Pharma', 'F&B', 'Pharma', 'F&B', 'Pharma', 'F&B', 'Pharma', 'F&B', 'Pharma']
}

df = pd.DataFrame(data)

# Filter companies based on relevance
relevant_companies = df[df['Relevance'] == 'Relevant']
not_relevant_companies = df[df['Relevance'] == 'Not Relevant']

# Display the categories for relevant companies
print("Relevant Companies and their Categories:")
print(relevant_companies[['Company Name', 'Category']])

# Display the categories for not relevant companies
print("\nNot Relevant Companies and their Categories:")
print(not_relevant_companies[['Company Name', 'Category']])


Relevant Companies and their Categories:
               Company Name Category
0                    Nestle      F&B
1  Dr. Reddy's Laboratories   Pharma
3                    Pfizer   Pharma
4                   PepsiCo      F&B
5         Johnson & Johnson   Pharma
6                    Danone      F&B
8             General Mills      F&B
9     GlaxoSmithKline (GSK)   Pharma

Not Relevant Companies and their Categories:
  Company Name Category
2    Coca-Cola      F&B
7        Bayer   Pharma


## Interactive Dropdown Widget

**Allows the user to view details of a specific company interactively.**


In [13]:
import pandas as pd
import ipywidgets as widgets
from IPython.display import display

# Creating a list of companies
companies = [
    {"name": "Nestle", "url": "https://www.nestle.com"},
    {"name": "Dr. Reddy's Laboratories", "url": "https://www.drreddys.com"},
    {"name": "Coca-Cola", "url": "https://www.coca-colacompany.com"},
    {"name": "Pfizer", "url": "https://www.pfizer.com"},
    {"name": "PepsiCo", "url": "https://www.pepsico.com"},
    {"name": "Johnson & Johnson", "url": "https://www.jnj.com"},
    {"name": "Danone", "url": "https://www.danone.com"},
    {"name": "Bayer", "url": "https://www.bayer.com"},
    {"name": "General Mills", "url": "https://www.generalmills.com"},
    {"name": "GlaxoSmithKline (GSK)", "url": "https://www.gsk.com"},
    {"name": "Kellogg’s", "url": "https://www.kelloggs.com"},
    {"name": "Merck & Co.", "url": "https://www.merck.com"},
    {"name": "Unilever", "url": "https://www.unilever.com"},
    {"name": "Roche", "url": "https://www.roche.com"},
    {"name": "Nestle Waters", "url": "https://www.nestlewaters.com"},
    {"name": "Sanofi", "url": "https://www.sanofi.com"},
    {"name": "Mondelez International", "url": "https://www.mondelezinternational.com"},
    {"name": "Novartis", "url": "https://www.novartis.com"},
    {"name": "Kraft Heinz", "url": "https://www.kraftheinzcompany.com"},
    {"name": "Eli Lilly and Company", "url": "https://www.lilly.com"},
    {"name": "Tyson Foods", "url": "https://www.tysonfoods.com"},
    {"name": "Teva Pharmaceuticals", "url": "https://www.tevapharm.com"},
    {"name": "Mars, Incorporated", "url": "https://www.mars.com"},
    {"name": "AbbVie", "url": "https://www.abbvie.com"},
    {"name": "Campbell Soup Company", "url": "https://www.campbellsoupcompany.com"},
    {"name": "Amgen", "url": "https://www.amgen.com"},
    {"name": "Conagra Brands", "url": "https://www.conagrabrands.com"},
    {"name": "AstraZeneca", "url": "https://www.astrazeneca.com"},
    {"name": "Molson Coors", "url": "https://www.molsoncoors.com"},
    {"name": "Boehringer Ingelheim", "url": "https://www.boehringeringelheim.com"},
    {"name": "AB InBev", "url": "https://www.abinbev.com"},
    {"name": "BASF", "url": "https://www.basf.com"},
    {"name": "Diageo", "url": "https://www.diageo.com"},
    {"name": "Procter & Gamble (P&G)", "url": "https://www.pg.com"},
    {"name": "Heineken", "url": "https://www.theheinekencompany.com"},
    {"name": "Medtronic", "url": "https://www.medtronic.com"},
    {"name": "McKesson", "url": "https://www.mckesson.com"},
    {"name": "AmerisourceBergen", "url": "https://www.amerisourcebergen.com"},
    {"name": "Cardinal Health", "url": "https://www.cardinalhealth.com"},
    {"name": "Medline Industries", "url": "https://www.medline.com"}
]

# Converting to DataFrame
df_companies = pd.DataFrame(companies)

# Displaying the table
df_companies.head()

Unnamed: 0,name,url
0,Nestle,https://www.nestle.com
1,Dr. Reddy's Laboratories,https://www.drreddys.com
2,Coca-Cola,https://www.coca-colacompany.com
3,Pfizer,https://www.pfizer.com
4,PepsiCo,https://www.pepsico.com


In [14]:
# Dropdown widget for selecting company
company_dropdown = widgets.Dropdown(
    options=df_companies['name'].tolist(),
    description='Company:',
    disabled=False,
)

# Function to display the selected company
def show_company_info(change):
    selected_company = df_companies[df_companies['name'] == change.new]
    display(selected_company)

# Displaying the dropdown and calling the function on change
company_dropdown.observe(show_company_info, names='value')
display(company_dropdown)


Dropdown(description='Company:', options=('Nestle', "Dr. Reddy's Laboratories", 'Coca-Cola', 'Pfizer', 'PepsiC…

Unnamed: 0,name,url
16,Mondelez International,https://www.mondelezinternational.com


Unnamed: 0,name,url
0,Nestle,https://www.nestle.com


Unnamed: 0,name,url
15,Sanofi,https://www.sanofi.com


Unnamed: 0,name,url
2,Coca-Cola,https://www.coca-colacompany.com
