In [88]:
# Impora le variabili e le dipendenze necessarie
import pandas as pd 
import numpy as np
import json
import openpyxl
from afinn import *
import sys

module_path = 'D:/Reepo_Git/Google_play_store/src'
sys.path.append(module_path)

from data_ingestor import *
from data_cleaner import *
from data_analyser import *

# Variabili d'ambiente
config_path = 'D:/Reepo_Git/Google_play_store/'
with open(config_path + 'config.json', 'r') as file:
    config = json.load(file)
    
raw_data_path = config['raw_data_path']
clean_data_path = config['clean_data_path']
positive_words_path = config['positive_words_path']
negative_words_path = config["negative_words_path"]

file_rev = 'googleplaystore_user_reviews.csv'




#############################

# Carica il Dataframe e pulisce i dati
db_rev = DataIngressor()
db_rev.load_file(raw_data_path + file_rev)
db_rev_mod = db_rev.series_view(["App", "Translated_Review"]).drop_duplicates().dropna()

# Salva il Dataframe pulito in un file csv 
db_rev_mod.to_csv(clean_data_path + 'clean_' + file_rev)

##################################

class DataAnalyser:
    def __init__(self, dbreview):
        """
        Inizializza la classe DataAnalyser.
        """
        self.afin = None
        self.dbreview = dbreview

    def building_afinn(self, language="en"):
        """Crea un oggetto Afinn.
        
        Args:
            language (str): Lingua testi. Default: 'en'.
        """
        self.afin = Afinn(language=language)

    def add_word(self, terminipositivi_path, termininegativi_path):
        """Aggiunge parole personalizzate all'oggetto Afinn.
        
        Args:
            terminipositivi_path (str): Percorso del file con termini positivi.
            termininegativi_path (str): Percorso del file con termini negativi.
        
        Returns:
            dict: Dizionario aggiornato di Afinn.
        """
        terp = pd.read_excel(terminipositivi_path, header=None)
        dictpos = {x: 2 for x in terp[0]}
        
        tern = pd.read_excel(termininegativi_path, header=None)
        dictneg = {x: -2 for x in tern[0]}
        
        for word, score in dictpos.items():
            self.afin._dict[word] = score
        
        for word, score in dictneg.items():
            self.afin._dict[word] = score
            
        return self.afin._dict

    def sentiment_score(self):
        """Calcola il punteggio del sentimento per ogni recensione."""
        scores = [self.afin.score(x) for x in self.dbreview["Translated_Review"]]
        self.dbreview["scoresentiment"] = scores
        return self.dbreview

    def sentiment_update(self):
        """Aggrega i punteggi del sentimento e li unisce con il dataframe dboriginale."""
        sentiment_avg = self.dbreview[["App", "scoresentiment"]].groupby("App").mean()
        dboriginalemarged = self.dbreview.merge(sentiment_avg, on="App", how="inner")
        return dboriginalemarged
    
test = DataAnalyser(db_rev_mod)
test.building_afinn()
test.add_word(positive_words_path, negative_words_path)
test.sentiment_score()
sentiment_analysis_db = test.sentiment_update()

sentiment_analysis_db.to_csv(clean_data_path + 'sentiment_analysis_db.csv')

