In [2]:
from django.contrib.auth.models import User

In [3]:
user = User.objects.get(id=1)

In [1]:
from datetime import datetime
import pandas as pd
import numpy as np
import tabula
from investment.models import VisaDesjardinsTransaction  # Make sure to import your VisaDesjardinsTransaction module

class VisaDesjardinsPdfTransactionProcessor:
    def __init__(self, file_path, user):
        self.file_path = file_path
        self.user = user
        self.transaction_model = VisaDesjardinsTransaction
        self.df = None

    def to_date(self, date_str):
        date_dict = {"MAI": 5, 'JUN': 6, 'JUL': 7, 'AOU': 8, 'SEP': 9, 'OCT': 10, 'JAN': 1, "FEV": 2, "MAR": 3, "AVR": 4}
        if isinstance(date_str, str):
            d_m = date_str.split()
            d = int(d_m[0])
            m = date_dict[d_m[1]]
            return datetime(2023, m, d)
        else:
            return np.nan

    def set_table_title(self, df):
        df.columns = df.iloc[1].to_list()
        df = df.iloc[3:]
        return df

    def format_columns(self, df):
        transaction_date_list = []
        record_date_list = []
        description_list = []
        rows_to_delete = []
        for k, data in df.iterrows():
            try:
                int(data["Description"][0:2])
                transaction_d=int(data["Description"][0:2])
                transaction_m=int(data["Description"][3:5])
                record_d = int(data["Description"][6:8])
                record_m = int(data["Description"][9:11])
                description = data["Description"][12:]
                transaction_date = datetime(2023, transaction_m, transaction_d)
                transaction_date_list.append(transaction_date)
                record_date = datetime(2023, record_m, record_d)
                record_date_list.append(record_date)
                description_list.append(description)
            except:
                   rows_to_delete.append(k)
        df = df.drop(rows_to_delete)
        df["transaction_date"] = transaction_date_list
        df["record_date"] = record_date_list
        df["description"] = description_list
        df.drop("Description", axis=1, inplace=True)
        
        # Convert columns to numeric
        df["BONIDOLLARS"] = pd.to_numeric(df["BONIDOLLARS"].str.replace(",", ".").str.replace("%", ""), errors="coerce")
        df["Montant"] = pd.to_numeric(df["Montant"].str.replace(",", "."), errors="coerce")
        df["BONIDOLLARS"] = df["BONIDOLLARS"] / 100
        
        columns_to_rename_dict = {"transaction_date": "date", 
                                  "description": "merchant", 
                                  "Montant": "amount"
                                 }
            
        df.rename(columns=columns_to_rename_dict, inplace=True)
            
        return df

    def process_dataframe(self, df):
        liste_colone_1 = ['Transactions effectuées avec la carte de : GENEVIEVE POITRAS', 'Unnamed: 0', 'Carte : 4540 33** **** 6028', 'Unnamed: 1']
        liste_colone_4 = ['Transactions effectuées avec la carte de : GENEVIEVE POITRAS', 'Carte : 4540 33** **** 6028', 'Unnamed: 0']
        liste_colone_2 = ['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Limite de crédit disponible : 12 000', 'Unnamed: 3', 'Unnamed: 4', "Taux d'intérêt"]
        liste_colone_3 = ['Unnamed: 0', 'Catégories', 'Unnamed: 1', 'BONIDOLLARS', '1 BONIDOLLAR accumulé = 1 $ à échanger']
        liste_colone_5 = ['Opérations au compte', '4540 33** **** 6002', 'Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2']
        liste_colone_6 = ['Solde précédent', 'Unnamed: 0', 'Accumulés depuis', 'Unnamed: 1', 'Utilisés depuis', 'Unnamed: 2', 'Ajustements', 'Unnamed: 3', 'Total']
        df_copy = df.copy()
        if df_copy.columns.to_list() == liste_colone_1:
            df_copy = self.set_table_title(df_copy)
            d = self.format_columns(df_copy)
            d = d[['BONIDOLLARS', 'amount', 'date', 'record_date', 'merchant']]
        elif df_copy.columns.to_list() == liste_colone_4:
            df_copy = self.set_table_title(df_copy)
            d = self.format_columns(df_copy)
        else:
            d = pd.DataFrame()
        return d
        
    def create_transaction(self, df):
        transaction_list = []
        transaction_model = self.transaction_model
        
        
        for k, transaction_data in df.iterrows():
            for column_name in df.columns:
                transaction_instance = transaction_model()
                if hasattr(transaction_instance, column_name):
                    setattr(transaction_instance, column_name, transaction_data[column_name])
                
            transaction_instance.user = self.user
            transaction_list.append(transaction_instance)
            
        return transaction_list         
    
    def get_dfs_from_pdf(self):
        pdf_nested_dict = {}
        dfs = tabula.read_pdf(self.file_path, stream=True, pages="all")
        for index, df in enumerate(dfs):
            pdf_nested_dict[index] = df
        return pdf_nested_dict
    
    def get_processed_df_list(self):
        processed_df_list = []
        for k, df in self.get_dfs_from_pdf().items():
            processed_df_list.append(self.process_dataframe(df))
        return processed_df_list
    
    def get_transaction_list(self):
        transaction_list = []
        for processed_df in self.get_processed_df_list():
            if not processed_df.empty:
                print(processed_df.columns.to_list())
                transaction_list += self.create_transaction(processed_df)
        return transaction_list




In [5]:
u = "data/Desjardins/visa/454033600_20230524.pdf" 
v = VisaDesjardinsPdfTransactionProcessor(u, user)

In [7]:
l = v.get_transaction_list()

['BONIDOLLARS', 'amount', 'date', 'record_date', 'merchant']
['BONIDOLLARS', 'amount', 'date', 'record_date', 'merchant']


In [9]:
dfs = v.get_processed_df_list()

In [41]:
for df in dfs:
    if not df.empty:
        for k, transaction_data in df.iterrows():
            transaction_instance = VisaDesjardinsTransaction()
            for column_name in df.columns:
                if hasattr(transaction_instance, column_name):
                    print(column_name, transaction_data[column_name])
                    setattr(transaction_instance, column_name, transaction_data[column_name])
            transaction_instance.user = user
            print("--------------", transaction_instance.__dict__)

            # transaction_instance.user = user
            # print(transaction_instance.amount)
        

BONIDOLLARS 0.005
amount 25.55
date 2023-04-25 00:00:00
record_date 2023-04-25 00:00:00
merchant SUPER C PAT 5963 POINTE AUX TRQC
-------------- {'_state': <django.db.models.base.ModelState object at 0x13fdbe8b0>, 'id': None, 'category_id': None, 'date': Timestamp('2023-04-25 00:00:00'), 'merchant': 'SUPER C PAT 5963 POINTE AUX TRQC', 'amount': 25.55, 'note': '', 'source_id': None, 'user_id': 1, 'is_completed': False, 'transaction_ptr_id': None, 'BONIDOLLARS': 0.005, 'record_date': Timestamp('2023-04-25 00:00:00')}
BONIDOLLARS 0.005
amount 41.39
date 2023-04-25 00:00:00
record_date 2023-04-25 00:00:00
merchant MAXI #8987 * POINTE-AUX-TRQC
-------------- {'_state': <django.db.models.base.ModelState object at 0x13fdbebe0>, 'id': None, 'category_id': None, 'date': Timestamp('2023-04-25 00:00:00'), 'merchant': 'MAXI #8987 * POINTE-AUX-TRQC', 'amount': 41.39, 'note': '', 'source_id': None, 'user_id': 1, 'is_completed': False, 'transaction_ptr_id': None, 'BONIDOLLARS': 0.005, 'record_date': 