# **GLOBAL SOLUTION 1 - PLN**

---


INTEGRANTES:

* Henrique Marra Barbosa - RM97672

* Arthur Hieda Cunha - RM551882

* Lucas Bueno Taets Gustavo - RM552162

> Instalando bibliotecas

In [69]:
!pip install textblob
!pip install vaderSentiment
!pip install -U spacy
!python -m spacy download en_core_web_sm
!pip install geopy
!pip install pandas
!pip install ipython

Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
     ---------------------------------------- 0.0/12.8 MB ? eta -:--:--
     ---------------------------------------- 0.1/12.8 MB 4.3 MB/s eta 0:00:03
     --- ------------------------------------ 1.1/12.8 MB 14.6 MB/s eta 0:00:01
     ---------- ----------------------------- 3.4/12.8 MB 27.2 MB/s eta 0:00:01
     --------------------- ------------------ 7.0/12.8 MB 40.7 MB/s eta 0:00:01
     ------------------------------ --------- 9.8/12.8 MB 45.0 MB/s eta 0:00:01
     --------------------------------------  12.8/12.8 MB 72.6 MB/s eta 0:00:01
     --------------------------------------- 12.8/12.8 MB 65.5 MB/s eta 0:00:00
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')


> Importando

In [1]:
# Imports necessários
import json
import spacy
from geopy.geocoders import Nominatim
import pandas as pd
from IPython.display import display
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

> Base de dados

In [2]:
# Carregar tweets do arquivo JSON
with open('tweets.json', 'r') as file:
    tweets = json.load(file)

> Extração de dados

In [3]:
# Análise de Sentimento usando Vader
analyzer = SentimentIntensityAnalyzer()

def analyze_sentiment(tweet):
    vs = analyzer.polarity_scores(tweet)
    polarity = vs['compound']
    
    # Definindo a categoria do sentimento
    if polarity > 0.05:
        sentiment_category = 'Positive'
    elif polarity < -0.05:
        sentiment_category = 'Negative'
    else:
        sentiment_category = 'Neutral'
        
    return polarity, sentiment_category


# Extração de Informações
nlp = spacy.load('en_core_web_sm')

def extract_information(tweet):
    doc = nlp(tweet)
    locations = [ent.text for ent in doc.ents if ent.label_ in ['GPE', 'LOC']]
    persons = [ent.text for ent in doc.ents if ent.label_ == 'PERSON']
    organizations = [ent.text for ent in doc.ents if ent.label_ == 'ORG']
    return locations, persons, organizations

# Geolocalização
geolocator = Nominatim(user_agent="abcd")

def get_coordinates(location):
    try:
        loc = geolocator.geocode(location)
        return (loc.latitude, loc.longitude) if loc else (None, None)
    except Exception as e:
        return (None, None)

In [4]:
# Processar tweets
tweet_data = []
location_cache = {}

for tweet in tweets:
    sentiment = analyze_sentiment(tweet)
    info = extract_information(tweet)
    coordinates = []
    for loc in info[0]:
        if loc not in location_cache:
            location_cache[loc] = get_coordinates(loc)
        coordinates.append(location_cache[loc])
    tweet_data.append({
        'Tweet': tweet,
        'Polarity': sentiment[0],
        'Sentiment Category': sentiment[1],
        'Locations': info[0],
        'Persons': info[1],
        'Organizations': info[2],
        'Coordinates': coordinates
    })

> Criando o dataframe

In [5]:
# Converter para DataFrame
df = pd.DataFrame(tweet_data)

# Função para exibir DataFrame com formatação bonita
def display_dataframe(df):
    styled_df = df.style.set_table_styles(
        [{'selector': 'th',
          'props': [('background-color', '#f2f2f2'),
                    ('color', 'black'),
                    ('font-weight', 'bold')]},
         {'selector': 'tr:nth-child(even)',
          'props': [('background-color', '#f9f9f9')]},
         {'selector': 'tr:nth-child(odd)',
          'props': [('background-color', 'white')]}]
    ).set_properties(**{'text-align': 'left', 'padding': '10px'})
    
    display(styled_df)

# Salvar em CSV
df.to_csv('tweet_analysis.csv', index=False)

# Exibir DataFrame formatado
display_dataframe(df)

Unnamed: 0,Tweet,Polarity,Sentiment Category,Locations,Persons,Organizations,Coordinates
0,There's a lot of plastic waste near the Great Barrier Reef. #OceanPollution,0.2023,Positive,[],[],[],[]
1,Saw oil spill near the coast of California. Authorities should act quickly! #SaveOurOceans,0.0,Neutral,['California'],[],[],"[(None, None)]"
2,Illegal dumping of waste by a company in the Gulf of Mexico. This needs to stop! #CleanSeas,-0.8805,Negative,['the Gulf of Mexico'],[],[],"[(None, None)]"
3,Plastic bottles and bags are all over the beach in Miami. #PollutionAlert,0.0,Neutral,['Miami'],['PollutionAlert'],[],"[(25.7741728, -80.19362)]"
4,Fishermen spotted dumping waste in the Arabian Sea. Authorities must take action. #OceanCleanUp,-0.6249,Negative,['the Arabian Sea'],[],[],"[(None, None)]"
5,A cruise ship was seen releasing waste into the Mediterranean Sea. #StopPollution,-0.4215,Negative,['the Mediterranean Sea'],[],[],"[(None, None)]"
6,Tons of garbage found floating near the shores of Bali. This is heartbreaking. #OceanCrisis,-0.4588,Negative,['Bali'],[],[],"[(-8.3304977, 115.0906401)]"
7,Marine life is suffering due to chemical waste near the coast of Florida. #EcoDisaster,-0.7096,Negative,['Florida'],['EcoDisaster'],[],"[(None, None)]"
8,Volunteer group cleaning up plastic waste near Sydney Harbour. #CleanOceans,-0.4215,Negative,['Sydney Harbour'],['Volunteer'],[],"[(-33.8440332, 151.219873)]"
9,Residents report sewage spill affecting marine life in the Bay of Bengal. #SaveMarineLife,0.0,Neutral,['the Bay of Bengal'],[],[],"[(None, None)]"
