In [13]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tag import pos_tag
from nltk.chunk import ne_chunk
import string

# Download NLTK resources (if not already downloaded)
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')

# Example text
text = "In a groundbreaking move, a consortium of renowned tech companies from across the globe has joined forces to spearhead a transformative initiative in the renewable energy sector. The ambitious project, codenamed EcoPower, aims to revolutionize the way we harness and utilize clean energy sources to combat climate change.Led by visionary entrepreneur and philanthropist, Mark Thompson, EcoPower brings together industry pioneers such as SolarTech Solutions, WindMasters Inc., and GreenWave Energy Systems. The collaboration leverages the expertise and resources of these powerhouses to accelerate the development and deployment of cutting-edge green technologies.Thompson, the driving force behind this endeavor, expressed his optimism for the project's potential impact. We are living in a critical moment for our planet. It is imperative that we shift towards sustainable energy practices to safeguard the future of generations to come, he emphasized.The consortium's research and development teams, comprising top scientists and engineers, are already hard at work, exploring innovative approaches to maximize renewable energy generation. Promising breakthroughs in solar panel efficiency, wind turbine design, and energy storage solutions are just a few areas where significant advancements are expected.Collaborating with universities and research institutes globally, EcoPower aims to tap into the brightest minds in the field. Prof. Maria Hernandez, a leading expert in renewable energy from Stanford University, praised the initiative, stating, This collaboration is a game-changer. The combined expertise of these tech giants will accelerate the pace of innovation and drive the transition to a sustainable energy future. The impact of EcoPower extends beyond the realm of technology. The project has already garnered the support of several governments and international bodies, including the United Nations. Countries such as Germany, Sweden, and Japan have pledged their commitment to providing policy support and financial incentives to expedite the adoption of renewable energy solutions.Notably, EcoPower has also received a significant investment from Clean Energy Ventures, a venture capital firm dedicated to funding sustainable technology startups. Speaking on behalf of the company, CEO Jennifer Martinez highlighted their belief in the potential of EcoPower to reshape the energy landscape. This collaboration represents a remarkable opportunity for disruptive innovation in the renewable energy sector. We are excited to be part of this historic endeavor, she remarked.As the world grapples with the urgency of climate change, initiatives like EcoPower provide a glimmer of hope. With the combined efforts of brilliant minds, forward-thinking companies, and supportive governments, a greener and more sustainable future seems within reach."

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!
[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Package words is already up-to-date!


In [14]:
def tokenize_text(text):
  # Tokenize the text into words
    tokens = word_tokenize(text)
    return(tokens)

In [15]:
tokenized_text = tokenize_text(text)
print(tokenized_text)

['In', 'a', 'groundbreaking', 'move', ',', 'a', 'consortium', 'of', 'renowned', 'tech', 'companies', 'from', 'across', 'the', 'globe', 'has', 'joined', 'forces', 'to', 'spearhead', 'a', 'transformative', 'initiative', 'in', 'the', 'renewable', 'energy', 'sector', '.', 'The', 'ambitious', 'project', ',', 'codenamed', 'EcoPower', ',', 'aims', 'to', 'revolutionize', 'the', 'way', 'we', 'harness', 'and', 'utilize', 'clean', 'energy', 'sources', 'to', 'combat', 'climate', 'change.Led', 'by', 'visionary', 'entrepreneur', 'and', 'philanthropist', ',', 'Mark', 'Thompson', ',', 'EcoPower', 'brings', 'together', 'industry', 'pioneers', 'such', 'as', 'SolarTech', 'Solutions', ',', 'WindMasters', 'Inc.', ',', 'and', 'GreenWave', 'Energy', 'Systems', '.', 'The', 'collaboration', 'leverages', 'the', 'expertise', 'and', 'resources', 'of', 'these', 'powerhouses', 'to', 'accelerate', 'the', 'development', 'and', 'deployment', 'of', 'cutting-edge', 'green', 'technologies.Thompson', ',', 'the', 'driving'

In [16]:
# Define a function for cleaning and tokenization
def clean_text(tokens):
    # Remove punctuation
    tokens = [token for token in tokens if token not in string.punctuation]

    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    tokens = [token for token in tokens if token not in stop_words]

    # Lemmatize the tokens
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(token) for token in tokens]

    return tokens


In [17]:
cleaned_text = clean_text(tokenized_text)
print(cleaned_text)

['In', 'groundbreaking', 'move', 'consortium', 'renowned', 'tech', 'company', 'across', 'globe', 'joined', 'force', 'spearhead', 'transformative', 'initiative', 'renewable', 'energy', 'sector', 'The', 'ambitious', 'project', 'codenamed', 'EcoPower', 'aim', 'revolutionize', 'way', 'harness', 'utilize', 'clean', 'energy', 'source', 'combat', 'climate', 'change.Led', 'visionary', 'entrepreneur', 'philanthropist', 'Mark', 'Thompson', 'EcoPower', 'brings', 'together', 'industry', 'pioneer', 'SolarTech', 'Solutions', 'WindMasters', 'Inc.', 'GreenWave', 'Energy', 'Systems', 'The', 'collaboration', 'leverage', 'expertise', 'resource', 'powerhouse', 'accelerate', 'development', 'deployment', 'cutting-edge', 'green', 'technologies.Thompson', 'driving', 'force', 'behind', 'endeavor', 'expressed', 'optimism', 'project', "'s", 'potential', 'impact', 'We', 'living', 'critical', 'moment', 'planet', 'It', 'imperative', 'shift', 'towards', 'sustainable', 'energy', 'practice', 'safeguard', 'future', 'ge

In [18]:
def tag_text(tokens):
  # Perform Part-of-Speech (POS) tagging
  pos_tags = pos_tag(tokens)
  return(pos_tags)

In [19]:
taged_text = tag_text(tokenized_text)
print(taged_text)

[('In', 'IN'), ('a', 'DT'), ('groundbreaking', 'NN'), ('move', 'NN'), (',', ','), ('a', 'DT'), ('consortium', 'NN'), ('of', 'IN'), ('renowned', 'JJ'), ('tech', 'NN'), ('companies', 'NNS'), ('from', 'IN'), ('across', 'IN'), ('the', 'DT'), ('globe', 'NN'), ('has', 'VBZ'), ('joined', 'VBN'), ('forces', 'NNS'), ('to', 'TO'), ('spearhead', 'VB'), ('a', 'DT'), ('transformative', 'JJ'), ('initiative', 'NN'), ('in', 'IN'), ('the', 'DT'), ('renewable', 'JJ'), ('energy', 'NN'), ('sector', 'NN'), ('.', '.'), ('The', 'DT'), ('ambitious', 'JJ'), ('project', 'NN'), (',', ','), ('codenamed', 'VBD'), ('EcoPower', 'NNP'), (',', ','), ('aims', 'VBZ'), ('to', 'TO'), ('revolutionize', 'VB'), ('the', 'DT'), ('way', 'NN'), ('we', 'PRP'), ('harness', 'VBP'), ('and', 'CC'), ('utilize', 'JJ'), ('clean', 'JJ'), ('energy', 'NN'), ('sources', 'NNS'), ('to', 'TO'), ('combat', 'VB'), ('climate', 'NN'), ('change.Led', 'NN'), ('by', 'IN'), ('visionary', 'JJ'), ('entrepreneur', 'NN'), ('and', 'CC'), ('philanthropist',

In [26]:
def ner_tags(tags):
  # Extract named entities using NER
  ner_tags = ne_chunk(tags)
  print("NER: ", ner_tags)
  l=[]
  # Print the named entities
  for entity in ner_tags:
    if hasattr(entity, 'label'):
      l.append((entity.label(),' '.join(c[0] for c in entity.leaves())))
      #print(entity.label(),"***************", ' '.join(c[0] for c in entity.leaves()))
  print(l)
  # print("************************Fusion*************************************")
  # new_list = []

  # # Compteur
  # i = 0
  # while i < len(l)-1:
  #   if l[i][0] == l[i+1][0] == 'PERSON':
  #       new_tuple = (l[i][0], l[i][1] + " " + l[i+1][1])
  #       new_list.append(new_tuple)
  #       i += 2
  #   else:
  #       new_list.append(l[i])
  #       i += 1
  # # Ajouter le dernier tuple s'il n'a pas été fusionné
  # if i == len(l)-1:
  #   new_list.append(l[i])
  # # Afficher la nouvelle liste résultante
  # print(new_list)

In [27]:
ner_tags(taged_text)

NER:  (S
  In/IN
  a/DT
  groundbreaking/NN
  move/NN
  ,/,
  a/DT
  consortium/NN
  of/IN
  renowned/JJ
  tech/NN
  companies/NNS
  from/IN
  across/IN
  the/DT
  globe/NN
  has/VBZ
  joined/VBN
  forces/NNS
  to/TO
  spearhead/VB
  a/DT
  transformative/JJ
  initiative/NN
  in/IN
  the/DT
  renewable/JJ
  energy/NN
  sector/NN
  ./.
  The/DT
  ambitious/JJ
  project/NN
  ,/,
  codenamed/VBD
  (ORGANIZATION EcoPower/NNP)
  ,/,
  aims/VBZ
  to/TO
  revolutionize/VB
  the/DT
  way/NN
  we/PRP
  harness/VBP
  and/CC
  utilize/JJ
  clean/JJ
  energy/NN
  sources/NNS
  to/TO
  combat/VB
  climate/NN
  change.Led/NN
  by/IN
  visionary/JJ
  entrepreneur/NN
  and/CC
  philanthropist/NN
  ,/,
  (PERSON Mark/NNP Thompson/NNP)
  ,/,
  (ORGANIZATION EcoPower/NNP)
  brings/VBZ
  together/RB
  industry/NN
  pioneers/NNS
  such/JJ
  as/IN
  (ORGANIZATION SolarTech/NNP Solutions/NNP)
  ,/,
  (ORGANIZATION WindMasters/NNP Inc./NNP)
  ,/,
  and/CC
  (ORGANIZATION GreenWave/NNP Energy/NNP Systems/NNPS)