In [1]:
%%capture

!pip install gensim
!pip install imblearn
!pip install contractions
!pip install tensorflow

In [2]:
# load dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import string
import re
#import fasttext
#import fasttext.util
#ft = fasttext.load_model('cc.es.300.bin') # load fasttext model
import gensim
from imblearn.over_sampling import SMOTE


import nltk

# Télécharger les ressources NLTK nécessaires
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')

# Maintenant, vous pouvez utiliser wordnet
from nltk.corpus import wordnet

syns = wordnet.synsets("program")
for syn in syns:
    print(syn.name(), syn.definition())

from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
from nltk.stem import WordNetLemmatizer

stemer = PorterStemmer()
lemma = WordNetLemmatizer()

import contractions

[nltk_data] Downloading package stopwords to
[nltk_data]     /home/ec2-user/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /home/ec2-user/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /home/ec2-user/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


plan.n.01 a series of steps to be carried out or goals to be accomplished
program.n.02 a system of projects or services intended to meet a public need
broadcast.n.02 a radio or television show
platform.n.02 a document stating the aims and principles of a political party
program.n.05 an announcement of the events that will occur as part of a theatrical or sporting event
course_of_study.n.01 an integrated course of academic studies
program.n.07 (computer science) a sequence of instructions that a computer can interpret and execute
program.n.08 a performance (or series of performances) at a public presentation
program.v.01 arrange a program of or for
program.v.02 write a computer program


In [3]:
import boto3
import logging
import pandas as pd
from botocore.exceptions import ClientError
from io import StringIO
import json
import pickle
import re
import string
# import contractions
# from nltk.corpus import stopwords
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer

# Set up logging
logger = logging.getLogger()
logging.basicConfig(level=logging.INFO)

class S3Handler:
    """Handles interactions with S3."""
    
    def __init__(self, bucket_name):
        self.s3 = boto3.client('s3')
        self.bucket_name = bucket_name

    def load_text_from_s3(self, key):
        """Loads text file from S3 and returns a list of comments."""
        obj = self.s3.get_object(Bucket=self.bucket_name, Key=key)
        text_data = obj['Body'].read().decode('utf-8')
        comments = text_data.strip().splitlines()
        return comments

    def load_model_from_s3(self, key):
        obj = self.s3.get_object(Bucket=self.bucket_name, Key=key)
        with open('/tmp/temp_model.h5', 'wb') as f:
            f.write(obj['Body'].read())
        model = load_model('/tmp/temp_model.h5')
        return model

    def load_class_weights_from_s3(self, key):
        obj = self.s3.get_object(Bucket=self.bucket_name, Key=key)
        return pickle.loads(obj['Body'].read())

class TextPreprocessor:
    """Handles text preprocessing for model input."""

    @staticmethod
    def preprocess_text(text):
        text = text.lower()
        text = contractions.fix(text)
        text = text.translate(str.maketrans('', '', string.punctuation))
        text = re.sub(r'\d+', '', text)
        stop_words = set(stopwords.words('spanish'))
        word_tokens = word_tokenize(text)
        filtered_text = [word for word in word_tokens if word not in stop_words]
        return ' '.join(filtered_text)

class GBVAnalyzer:
    """Analyzes text for indicators of gender-based violence."""

    def __init__(self, model, class_weights, maxlen=100):
        self.model = model
        self.class_weights = class_weights
        self.tokenizer = Tokenizer(num_words=10000)
        self.maxlen = maxlen

    def analyze_comment(self, text):
        processed_text = TextPreprocessor.preprocess_text(text)
        self.tokenizer.fit_on_texts([processed_text])
        sequences = self.tokenizer.texts_to_sequences([processed_text])
        X = pad_sequences(sequences, maxlen=self.maxlen)
        predictions = self.model.predict(X)
        predicted_class = predictions.argmax(axis=-1)
        return predicted_class

class SnsWrapper:
    """Encapsulates Amazon SNS topic and subscription functions."""

    def __init__(self, sns_topic_arn):
        self.sns_client = boto3.client("sns")
        self.sns_topic_arn = sns_topic_arn

    def publish_alert(self, subject, message):
        try:
            response = self.sns_client.publish(
                TopicArn=self.sns_topic_arn,
                Subject=subject,
                Message=message
            )
            logger.info("Published alert with Message ID: %s", response['MessageId'])
            return response['MessageId']
        except ClientError:
            logger.exception("Failed to send alert.")
            raise

def main():
    # Configurations and constants
    s3_bucket = 'un-datathon-2024-sisifos'
    s3_key_model = 'GBV_Analysis/model/model.h5'
    s3_key_class_weights = 'GBV_Analysis/model/class_weights.pickle'
    sns_topic_arn = "arn:aws:sns:us-west-2:080532742200:TweetSentimentAlert"
    s3_key_data = 'GBV_Analysis/comments.txt'
    
    # Initialize handlers and clients
    s3_handler = S3Handler(s3_bucket)
    sns_wrapper = SnsWrapper(sns_topic_arn)

    # Load GBV detection model and class weights
    gbv_model = s3_handler.load_model_from_s3(s3_key_model)
    class_weights = s3_handler.load_class_weights_from_s3(s3_key_class_weights)
    gbv_analyzer = GBVAnalyzer(gbv_model, class_weights)

    # Load comments from the txt file
    comments = s3_handler.load_text_from_s3(s3_key_data)

    # Process each comment
    for comment_text in comments:
        prediction = gbv_analyzer.analyze_comment(comment_text)

        # If comment is flagged as sexual abuse, send an alert
        if prediction == 1:
            subject = "Urgent: Gender-Based Violence Alert"
            message = f"Alert: A potential case of gender-based violence was detected.\nMessage: '{comment_text}'"
            sns_wrapper.publish_alert(subject, message)
            print(f"Alert sent for comment: {comment_text}")

if __name__ == "__main__":
    main()


2024-11-11 18:47:22.851831: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1731350842.870909   19445 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1731350842.876396   19445 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-11 18:47:22.896133: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole
2024-11-11 18:47:48.01

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 623ms/step


INFO:root:Published alert with Message ID: a9148037-4152-5dcd-941e-df9b12c7fc46


Alert sent for comment: Mi jefe me hizo insinuaciones sexuales incómodas en la oficina, y nadie me escucha cuando trato de denunciarlo.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step


INFO:root:Published alert with Message ID: 94027de6-cec6-5b05-b512-224a9d507a8e


Alert sent for comment: Un hombre en el transporte público intentó tocarme de manera inapropiada, fue muy intimidante y me siento insegura.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step


INFO:root:Published alert with Message ID: 376501e8-b193-5d94-addf-5e44e205ec1f


Alert sent for comment: Mi pareja me empujó y me golpeó en una discusión, y temo que pueda pasar de nuevo.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step


INFO:root:Published alert with Message ID: fb48d1b8-81d1-53e3-ab80-8e14d2476268


Alert sent for comment: Fui acosada sexualmente por un compañero de trabajo, que siempre hace comentarios sobre mi cuerpo.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step


INFO:root:Published alert with Message ID: 30c794dd-3a80-5c8a-a87e-cb7169160d01


Alert sent for comment: Mi esposo controla todos los gastos y no me deja acceder a nuestro dinero, ni siquiera para comprar cosas básicas.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step


INFO:root:Published alert with Message ID: 35dba6e6-f98a-5dc0-b3e4-f94aaa1c0c4b


Alert sent for comment: Un hombre en el trabajo intentó besarme a la fuerza y luego me amenazó con despedirme si contaba algo.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step


INFO:root:Published alert with Message ID: 7474e2d2-1fc6-520a-a429-bd7e4c509381


Alert sent for comment: Mis padres constantemente me critican y hacen que dude de mis capacidades, haciéndome sentir inferior.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 98ms/step
