In [1]:
import pandas as pd
import psycopg2
import numpy as np
from datetime import datetime
import os
from dotenv import load_dotenv

In [2]:
dotenv_path = r"C:\Users\erico\Documents\projeto-regressao\Supermarket-store-sales-prediction\.env\.env"
load_dotenv(dotenv_path)

True

In [3]:
raw = pd.read_csv(r'C:\Users\erico\Documents\projeto-regressao\Supermarket-store-sales-prediction\data\raw\Stores.csv', delimiter=';')

In [4]:
raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 896 entries, 0 to 895
Data columns (total 5 columns):
 #   Column                Non-Null Count  Dtype
---  ------                --------------  -----
 0   Store ID              896 non-null    int64
 1   Store_Area            896 non-null    int64
 2   Items_Available       896 non-null    int64
 3   Daily_Customer_Count  896 non-null    int64
 4   Store_Sales           896 non-null    int64
dtypes: int64(5)
memory usage: 35.1 KB


In [5]:
# Criar coluna de data para simplificar querys no futuro
# Define a data de início
start_date = '2021-01-01'

# Cria um intervalo de datas com base na data de início e no número de dias desejado (neste exemplo, 896 dias)
date_range = pd.date_range(start=start_date, periods=len(raw), freq='D')

In [6]:
raw['Sales_Date'] = date_range

In [7]:
raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 896 entries, 0 to 895
Data columns (total 6 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   Store ID              896 non-null    int64         
 1   Store_Area            896 non-null    int64         
 2   Items_Available       896 non-null    int64         
 3   Daily_Customer_Count  896 non-null    int64         
 4   Store_Sales           896 non-null    int64         
 5   Sales_Date            896 non-null    datetime64[ns]
dtypes: datetime64[ns](1), int64(5)
memory usage: 42.1 KB


In [8]:
# Conectar ao banco de dados
conn = psycopg2.connect(
    host="localhost",
    database="Supermarket",
    user="postgres",
    password=os.environ.get("PG_PASSWORD")
)

# Abrir um cursor para executar consultas
cur = conn.cursor()
cur.execute("ROLLBACK")

In [9]:
# Executar uma consulta SQL para verificar se a tabela já existe
cur.execute("""
    SELECT EXISTS (
        SELECT 1
        FROM   information_schema.tables 
        WHERE  table_name = 'Supermarket_Gold'
    );
""")

# Recuperar o resultado da consulta
table_exists = cur.fetchone()[0]

# Se a tabela não existir, criar a tabela
if not table_exists:
    cur.execute("""
        CREATE TABLE IF NOT EXISTS Supermarket_Gold (
            store_id INT PRIMARY KEY,
            sales_date DATE,
            store_area INT,
            items_available INT,
            daily_customer_count INT,
            store_sales INT
        );
    """)
    conn.commit()

In [10]:
# Popular banco de dados

# Loop através das linhas do dataframe e inserir cada linha na tabela
for index, row in raw.iterrows():
    cur.execute("""
        INSERT INTO Supermarket_Gold (store_id, sales_date, store_area, items_available, daily_customer_count, store_sales)
        VALUES (%s, %s, %s, %s, %s, %s)
        ON CONFLICT DO NOTHING;
    """, (
        int(row['Store ID ']),
        row['Sales_Date'].date(),
        int(row['Store_Area']),
        int(row['Items_Available']),
        int(row['Daily_Customer_Count']),
        int(row['Store_Sales'])
    ))

# Salvar as alterações no banco de dados
conn.commit()

# Fechar o cursor e a conexão com o banco de dados
cur.close()
conn.close()