In [1]:
import pandas as pd

In [2]:
file = "cleaned_csv.csv"

In [3]:
df = pd.read_csv(file, encoding= 'unicode_escape')


In [4]:
df.columns

Index(['Data operazione', 'Data contabile', 'Tipologia', 'Nome', 'Descrizione',
       'Importo'],
      dtype='object')

for this purpose we will only use the Data operazione field

In [5]:
df["Data operazione"]

0       29/08/2024
1       29/08/2024
2       29/08/2024
3       29/08/2024
4       27/08/2024
           ...    
1071    23/09/2023
1072    23/09/2023
1073    22/09/2023
1074    22/09/2023
1075    22/09/2023
Name: Data operazione, Length: 1076, dtype: object

The purpose of this code is to organize financial transaction data by date.

By splitting the dataset into separate CSV files for each month and year, we create a more manageable file structure. This approach allows for easier data analysis and reporting on a monthly or yearly basis, and can significantly improve data retrieval and processing efficiency for large datasets spanning multiple years.


In [6]:
import os
from datetime import datetime

# Convert 'Data operazione' to datetime
df['Data operazione'] = pd.to_datetime(df['Data operazione'], format='%d/%m/%Y')

# Create a function to save dataframe to CSV
def save_to_csv(group, year, month):
    folder = f"output/{year}"
    os.makedirs(folder, exist_ok=True)
    file_path = f"{folder}/{month:02d}.csv"
    
    # Convert 'Data operazione' back to original format
    group['Data operazione'] = group['Data operazione'].dt.strftime('%d/%m/%Y')
    
    group.to_csv(file_path, index=False)

# Group by year and month, then apply the save function
df.groupby([df['Data operazione'].dt.year, df['Data operazione'].dt.month]).apply(
    lambda x: save_to_csv(x, x.name[0], x.name[1])
)

print("CSV files have been created and saved in the 'output' folder.")

CSV files have been created and saved in the 'output' folder.
