# 03_Load(L)

In [1]:
import pandas as pd 
import os 

## Load the base file

In [2]:
df = pd.read_csv("data/sample_to_save.csv")
df

Unnamed: 0,id,name,email,age,country
0,1,Ana,ana@example.com,23,spain
1,2,Carlos,carlos@example.com,31,spain
2,3,Lucía,lucia@example.com,27,spain
3,4,Pedro,pedro@example.com,44,spain
4,5,María,maria@example.com,19,spain


In [3]:
df.head(2)

Unnamed: 0,id,name,email,age,country
0,1,Ana,ana@example.com,23,spain
1,2,Carlos,carlos@example.com,31,spain


In [4]:
df.info

<bound method DataFrame.info of    id    name               email  age country
0   1     Ana     ana@example.com   23   spain
1   2  Carlos  carlos@example.com   31   spain
2   3   Lucía   lucia@example.com   27   spain
3   4   Pedro   pedro@example.com   44   spain
4   5   María   maria@example.com   19   spain>

In [5]:
df.describe

<bound method NDFrame.describe of    id    name               email  age country
0   1     Ana     ana@example.com   23   spain
1   2  Carlos  carlos@example.com   31   spain
2   3   Lucía   lucia@example.com   27   spain
3   4   Pedro   pedro@example.com   44   spain
4   5   María   maria@example.com   19   spain>

## Save as csv in an output folder 

### We create the folder where this document will go

In [6]:
os.makedirs("output", exist_ok=True)

### Save the DataFrame in the previously created folder

In [7]:
df.to_csv("output/data_clean.csv", index=False)

## Save as json

In [8]:
os.makedirs("output", exist_ok=True)

In [9]:
df.to_json("output/data_clean.json", orient="records", indent=2)

## Save as excel

In [10]:
df.to_excel("output/data_clean.xlsx", index=False)

## Save multiple versions

### Ask the user which version they want to save

In [11]:
version = input("Enter version name (e.g., v1): ")
version = version.strip()


### Build the base filename (which we will reuse)

In [12]:
base_path = f"output/data_clean_{version}" 

### Save the versioned CSV file

In [13]:
df.to_csv(f"{base_path}.csv", index=False)


### Save the versioned JSON file

In [14]:
df.to_json(f"{base_path}.json", orient="records", indent=2)

## Create a load_data function

In [15]:
def load_data(df, basename):
    os.makedirs("output", exist_ok=True)
    basename = basename.strip()
    folder = f"output/data_clean_{basename}"
    df.to_csv(f"{folder}.csv", index = False)
    df.to_json(f"{folder}.json", orient ="records" , indent =2)
    df.to_excel(f"{folder}.xlsx", index = False)

## Test the load_data function

In [16]:
load_data(df, "final_output")

## Final exercise: save file with timestamp


In [17]:
from datetime import datetime

In [18]:
def save_with_date(df):
    """
    Save DataFrame to a CSV file with a timestamp in the filename.
    Format: output/save_YYYYMMDD_HHMMSS.csv
    """
    os.makedirs("output", exist_ok=True)

    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = f"output/save_{timestamp}.csv"

    df.to_csv(filename, index=False)

    return filename

In [19]:
output_file = save_with_date(df)
output_file


'output/save_20251202_152539.csv'

In [20]:
# 20251202_143141 = timestamp using the YYYYMMDD_HHMMSS format
#
# Breakdown:
# 2025  -> Year
# 12    -> Month
# 02    -> Day
# 14    -> Hour (24h)
# 31    -> Minutes
# 41    -> Seconds
#
# This format ensures that every file has a unique name
# and makes it easy to sort files by date and time.
