<a href="https://colab.research.google.com/github/GustavoNicodemos/Algoritmo_analise_resultado/blob/main/An%C3%A1lise_GUV_V3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Instale o pacote necessário
!pip install pyxlsb openpyxl

import numpy as np
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.cluster import MiniBatchKMeans
from sklearn.preprocessing import OneHotEncoder
import xgboost as xgb

class AccountingAnalyzer:
    def __init__(self):
        self.anomaly_detector = IsolationForest(n_estimators=100, contamination=0.05, random_state=42)
        self.clusterer = MiniBatchKMeans(n_clusters=5, random_state=42)
        self.encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
        self.classifier = xgb.XGBClassifier()
        self.history = pd.DataFrame()
        self.treinou = False

    def fit(self, historical_data):
        features = self._preprocess(historical_data)
        self.anomaly_detector.fit(features)
        self.historical_data = historical_data.copy()
        self.treinou = True
        print("✅ Modelo treinado com sucesso com base histórica.")

    def analyze(self, new_data):
        if not self.treinou:
            raise ValueError("❌ O modelo precisa ser treinado antes da análise. Use o método .fit() com dados históricos.")

        new_data = self._check_history(new_data)
        features = self._preprocess(new_data)

        new_data['anomaly'] = self.anomaly_detector.predict(features)
        new_data['cluster'] = self.clusterer.fit_predict(features)

        new_data = self._calculate_variations(new_data)

        new_data['explanation'] = self._generate_explanations(new_data)

        consolidated_data = self._consolidate_data(new_data)

        self._update_history(new_data)

        # Nova funcionalidade: análise DRE com desvio
        variacoes_relevantes, detalhes_variacoes = self._detect_significant_variations(new_data)

        return {
            'anomalias_df': new_data[new_data['anomaly'] == -1],
            'consolidated_df': consolidated_data,
            'explicacoes_df': new_data[['Group_Account', 'explanation']],
            'variacoes_relevantes': variacoes_relevantes,
            'detalhes_variacoes': detalhes_variacoes
        }

    def _check_history(self, data):
        if self.history.empty:
            return data
        return data[~data.index.isin(self.history.index)]

    def _preprocess(self, data):
        cat_data = self.encoder.fit_transform(data[['Cost_Center', 'Group_Account']])
        num_data = data[['Amount_in_LC']].values
        return np.hstack([num_data, cat_data])

    def _calculate_variations(self, data):
        data = data.sort_values(by=['Cost_Center', 'Group_Account', 'Period'])
        data['previous_amount'] = data.groupby(['Cost_Center', 'Group_Account'])['Amount_in_LC'].shift(1)
        data['absolute_variation'] = data['Amount_in_LC'] - data['previous_amount']
        data['percent_variation'] = (data['absolute_variation'] / data['previous_amount'].replace(0, np.nan)) * 100
        return data

    def _generate_explanations(self, data):
        explanations = []
        for _, row in data.iterrows():
            if row['anomaly'] == -1:
                explanations.append(f"Anomalia detectada em {row['Group_Account']}.")
            elif pd.notnull(row['absolute_variation']) and abs(row['percent_variation']) > 10:
                explanations.append(f"Variação significativa de {row['percent_variation']:.2f}% em {row['Group_Account']}.")
            else:
                explanations.append("Sem anomalias ou variações significativas.")
        return explanations

    def _consolidate_data(self, data):
        consolidated = data.groupby('Group_Account').agg({
            'Amount_in_LC': 'sum',
            'absolute_variation': 'sum',
            'percent_variation': 'mean'
        }).reset_index()
        return consolidated.sort_values(by='Group_Account')

    def _update_history(self, data):
        self.history = pd.concat([self.history, data], ignore_index=True)

    def _detect_significant_variations(self, new_data):
        df_historico = self.historical_data.copy()
        df_novo = new_data.copy()

        df_historico['Period'] = pd.to_datetime(df_historico['Period'])
        df_novo['Period'] = pd.to_datetime(df_novo['Period'])

        # DRE histórica por Cost Center e Group Account
        dre_hist = df_historico.groupby(['Cost_Center', 'Group_Account', 'Period'])['Amount_in_LC'].sum().reset_index()
        desvio = dre_hist.groupby(['Cost_Center', 'Group_Account'])['Amount_in_LC'].std().reset_index()
        desvio.columns = ['Cost_Center', 'Group_Account', 'std_dev']

        # DRE atual
        dre_atual = df_novo.groupby(['Cost_Center', 'Group_Account'])['Amount_in_LC'].sum().reset_index()

        comparativo = pd.merge(dre_atual, desvio, on=['Cost_Center', 'Group_Account'], how='left')

        comparativo['std_dev'] = comparativo['std_dev'].fillna(0)
        comparativo['relevant'] = abs(comparativo['Amount_in_LC']) > (comparativo['std_dev'] * 1.5)

        relevantes = comparativo[comparativo['relevant']].copy()

        # Detalhamento dos lançamentos responsáveis
        detalhes = pd.merge(df_novo, relevantes[['Cost_Center', 'Group_Account']], on=['Cost_Center', 'Group_Account'], how='inner')

        return relevantes, detalhes

# Execução no Colab
if __name__ == "__main__":
    base_dados = pd.read_excel('Dados_GUV.xlsb', engine='pyxlsb')
    dados_historicos = pd.read_excel('Dados_historicos_GUV.xlsb', engine='pyxlsb')

    analisador = AccountingAnalyzer()
    analisador.fit(dados_historicos)

    relatorio = analisador.analyze(base_dados)

    with pd.ExcelWriter('Relatorio_Contabil_Analitico.xlsx', engine='openpyxl') as writer:
        relatorio['anomalias_df'].to_excel(writer, sheet_name='Anomalias', index=False)
        relatorio['consolidated_df'].to_excel(writer, sheet_name='Consolidado_DRE', index=False)
        relatorio['explicacoes_df'].drop_duplicates().to_excel(writer, sheet_name='Explicacoes', index=False)
        relatorio['variacoes_relevantes'].to_excel(writer, sheet_name='Variações Relevantes', index=False)
        relatorio['detalhes_variacoes'].to_excel(writer, sheet_name='Detalhamento Lançamentos', index=False)

    print("✅ Planilha final gerada com múltiplas abas.")