In [3]:
import os
import sys
import dotenv
import yaml
import pandas as pd

# Set up paths
project_dir = os.getenv('PROJECT_DIR')
dotenv_path = os.path.join(project_dir, '.env')
dotenv.load_dotenv(dotenv_path)

    
sys.path.append(project_dir)

In [8]:
from config import Config
from src.my_utils import fetch_table_to_parquet
from src.data.make_dataset import RawDataHandler
config = Config()

---

In [9]:
raw_data_path = os.path.join(config.data_vault_dir, 'Intel_Image_CNN')

handle_raw_data = RawDataHandler(raw_data_path)
df = handle_raw_data.fetch_raw_data()
df = handle_raw_data.encode_labels()

In [10]:
df

Unnamed: 0,Segment,Category,Path,Label
0,seg_train,buildings,C:\Users\delst\Data_Vault\Intel_Image_CNN\seg_...,5.0
1,seg_train,buildings,C:\Users\delst\Data_Vault\Intel_Image_CNN\seg_...,5.0
2,seg_train,buildings,C:\Users\delst\Data_Vault\Intel_Image_CNN\seg_...,5.0
3,seg_train,buildings,C:\Users\delst\Data_Vault\Intel_Image_CNN\seg_...,5.0
4,seg_train,buildings,C:\Users\delst\Data_Vault\Intel_Image_CNN\seg_...,5.0
...,...,...,...,...
24330,seg_pred,seg_pred,C:\Users\delst\Data_Vault\Intel_Image_CNN\seg_...,
24331,seg_pred,seg_pred,C:\Users\delst\Data_Vault\Intel_Image_CNN\seg_...,
24332,seg_pred,seg_pred,C:\Users\delst\Data_Vault\Intel_Image_CNN\seg_...,
24333,seg_pred,seg_pred,C:\Users\delst\Data_Vault\Intel_Image_CNN\seg_...,


---

In [11]:
# # Create the SQLAlchemy engine and store the DataFrame into PostgreSQL database
# engine = create_engine(db_url)

# df.to_sql(
#     'intel_image_data',
#     engine,
#     if_exists='replace',
#     index=False
#     )

In [13]:
df = fetch_table_to_parquet(
    db_url=config.db_url,
    table_name='intel_image_data',
    data_dir=config.data_dir,
    parquet_file_name='intel_image_paths_labels.parq',
    save_as_excel=True,
    excel_file_name='intel_image_paths_labels.xlsx',
    return_metadata=False
    )
df

Files saved to: data/raw/[file_name]


Unnamed: 0,Segment,Category,Path,Label
0,seg_train,buildings,C:\Users\delst\Data_Vault\Intel_Image_CNN\seg_...,5.0
1,seg_train,buildings,C:\Users\delst\Data_Vault\Intel_Image_CNN\seg_...,5.0
2,seg_train,buildings,C:\Users\delst\Data_Vault\Intel_Image_CNN\seg_...,5.0
3,seg_train,buildings,C:\Users\delst\Data_Vault\Intel_Image_CNN\seg_...,5.0
4,seg_train,buildings,C:\Users\delst\Data_Vault\Intel_Image_CNN\seg_...,5.0
...,...,...,...,...
24330,seg_pred,seg_pred,C:\Users\delst\Data_Vault\Intel_Image_CNN\seg_...,
24331,seg_pred,seg_pred,C:\Users\delst\Data_Vault\Intel_Image_CNN\seg_...,
24332,seg_pred,seg_pred,C:\Users\delst\Data_Vault\Intel_Image_CNN\seg_...,
24333,seg_pred,seg_pred,C:\Users\delst\Data_Vault\Intel_Image_CNN\seg_...,


---

In [14]:
raw_data_path_parq = os.path.join(config.data_dir, 'raw/intel_image_paths_labels.parq')
raw_data_path_xlsx = os.path.join(config.data_dir, 'raw/intel_image_paths_labels.xlsx')

In [17]:
pd.read_parquet(raw_data_path_parq, engine='pyarrow')

Unnamed: 0,Segment,Category,Path,Label
0,seg_train,buildings,C:\Users\delst\Data_Vault\Intel_Image_CNN\seg_...,5.0
1,seg_train,buildings,C:\Users\delst\Data_Vault\Intel_Image_CNN\seg_...,5.0
2,seg_train,buildings,C:\Users\delst\Data_Vault\Intel_Image_CNN\seg_...,5.0
3,seg_train,buildings,C:\Users\delst\Data_Vault\Intel_Image_CNN\seg_...,5.0
4,seg_train,buildings,C:\Users\delst\Data_Vault\Intel_Image_CNN\seg_...,5.0
...,...,...,...,...
24330,seg_pred,seg_pred,C:\Users\delst\Data_Vault\Intel_Image_CNN\seg_...,
24331,seg_pred,seg_pred,C:\Users\delst\Data_Vault\Intel_Image_CNN\seg_...,
24332,seg_pred,seg_pred,C:\Users\delst\Data_Vault\Intel_Image_CNN\seg_...,
24333,seg_pred,seg_pred,C:\Users\delst\Data_Vault\Intel_Image_CNN\seg_...,


In [18]:
pd.read_excel(raw_data_path_xlsx)

Unnamed: 0,Segment,Category,Path,Label
0,seg_train,buildings,C:\Users\delst\Data_Vault\Intel_Image_CNN\seg_...,5.0
1,seg_train,buildings,C:\Users\delst\Data_Vault\Intel_Image_CNN\seg_...,5.0
2,seg_train,buildings,C:\Users\delst\Data_Vault\Intel_Image_CNN\seg_...,5.0
3,seg_train,buildings,C:\Users\delst\Data_Vault\Intel_Image_CNN\seg_...,5.0
4,seg_train,buildings,C:\Users\delst\Data_Vault\Intel_Image_CNN\seg_...,5.0
...,...,...,...,...
24330,seg_pred,seg_pred,C:\Users\delst\Data_Vault\Intel_Image_CNN\seg_...,
24331,seg_pred,seg_pred,C:\Users\delst\Data_Vault\Intel_Image_CNN\seg_...,
24332,seg_pred,seg_pred,C:\Users\delst\Data_Vault\Intel_Image_CNN\seg_...,
24333,seg_pred,seg_pred,C:\Users\delst\Data_Vault\Intel_Image_CNN\seg_...,
