This script is to be run after mail files have been returned from PeachTree.
They will be stored in our FTP server.
Unzip, change naming conventions as seen below, drag into Processing folder.

In [None]:
import pandas as pd
import sqlalchemy
import pyodbc
import os
from pw import *

In [None]:
from sqlalchemy.engine import URL
connection_url = URL.create("mssql+pyodbc", query={"odbc_connect": cag_connection_string})

from sqlalchemy import create_engine
engine = create_engine(connection_url)

from sqlalchemy.types import NVARCHAR
cursor = engine.connect()

In [None]:
## Set brand name.
brand = 'KP'
## Set catalog run name.
catalog = 'WN24'
## Set the start/in home date.
startdate = '01/08/2024'

# Auto-setting brand name.
if brand == "CT":
    name = "Connecting Threads (CT)"
elif brand == "KP":
    name = "Knit Picks (KP)"
elif brand == "ST":
    name = "Superior Threads (ST)"
elif brand == "WC":
    name = "We Crochet (WC)"

Drag mail files into the processing folder. 
You will need to change the title to match the syntax (brand Initials)_(catalog Code)_(INHOME/HOLDS/CA).
E.g. KP_WN24_INHOME

In [None]:
inhome = pd.read_csv(r'./Processing/'+brand+'_'+catalog+'_INHOME.txt', sep = '|', names =['matchback','customerid','sourcecode','inhome'], encoding='latin-1')

In [None]:
holds = pd.read_csv(r'./Processing/'+brand+'_'+catalog+'_HOLDS.csv', dtype = object)

In [None]:
canada = pd.read_csv(r'./Processing/'+brand+'_'+catalog+'_CA.csv', dtype = object)

Insert imported files into CAGMAIN.DATA_IMPORTS for long term storage.
The sprocs in place for processing blend new and old, no need to import twice.

In [None]:
inhome.to_sql(brand+'_'+catalog+'_INHOME', engine, if_exists='replace', index=False, dtype={col_name: NVARCHAR for col_name in inhome})

In [None]:
holds.to_sql(brand+'_'+catalog+'_HOLDS', engine, if_exists='replace', index=False, dtype={col_name: NVARCHAR for col_name in holds})

In [None]:
canada.to_sql(brand+'_'+catalog+'_CA', engine, if_exists='replace', index=False, dtype={col_name: NVARCHAR for col_name in canada})

In [None]:
from sqlalchemy.engine import URL
connection_url = URL.create("mssql+pyodbc", query={"odbc_connect": pna_connection_string})

from sqlalchemy import create_engine
engine = create_engine(connection_url)

from sqlalchemy.types import NVARCHAR
cursor = engine.connect()

In [None]:
inhome_query = (
"SELECT "
"CAST(b.id AS INT) as internalid,  "
"a.customerid, "
"a.matchback, "
"'"+catalog+"' as catalog, "
"a.sourcecode, "
"'"+startdate+"' as inhome, " 
"'"+name+"' as brand "
"FROM "+cag_server+"."+cag_db+".dbo."+brand+"_"+catalog+"_INHOME A "
"INNER JOIN NETSUITE.ns.Customer B "
"ON A.customerid = B.entityid "
"WHERE a.customerid <> 'employee' "
"AND a.sourcecode <> 'employee' "
)

inhome_df = pd.read_sql_query(inhome_query, engine)

inhome_df.shape

In [None]:
holds_query = (
"SELECT "
"CAST(b.id AS INT) as internalid,  "
"a.customerid, "
"a.matchback, "
"'"+catalog+"' as catalog, "
"a.sourcecode, "
"'"+startdate+"' as inhome, " 
"'"+name+"' as brand "
"FROM "+cag_server+"."+cag_db+".dbo."+brand+"_"+catalog+"_HOLDS A "
"INNER JOIN NETSUITE.ns.Customer B "
"ON A.customerid = B.entityid "
"WHERE a.customerid <> 'employee' "
"AND a.sourcecode <> 'employee' "
)

holds_df = pd.read_sql_query(holds_query, engine)

holds_df.shape

In [None]:
canada_query = (
"SELECT "
"CAST(b.id AS INT) as internalid,  "
"a.customerid, "
"a.matchback, "
"'"+catalog+"' as catalog, "
"a.sourcecode, "
"'"+startdate+"' as inhome, " 
"'"+name+"' as brand "
"FROM "+cag_server+"."+cag_db+".dbo."+brand+"_"+catalog+"_CA A "
"INNER JOIN NETSUITE.ns.Customer B "
"ON A.customerid = B.entityid "
"WHERE a.customerid <> 'employee' "
"AND a.sourcecode <> 'employee' "
)

canada_df = pd.read_sql_query(canada_query, engine)

canada_df.shape

Now union all three dataframes.
We are creating a main table, adding a unique external id, and breaking into multiple csvs to upload into Netsuite.
We currently don't have the option to programatically insert these.

In [None]:
# Union all data frames together.
df = pd.concat([inhome_df, holds_df, canada_df], ignore_index=True)

In [None]:
# Create a unique external id with 6 digit places, depending on file size.
total_rows = len(df)
counts = [str(i).zfill(6) for i in range(1, total_rows + 1)]
# Create External ID column.
df['externalid'] = brand + '-' + catalog + '-' + pd.Series(counts, dtype=str)

In [None]:
# Define the chunk size and naming convention
chunk_size = 24999
naming_convention = '_V'

# Calculate the number of chunks needed
num_chunks = (len(df) // chunk_size) + 1

# Folder Path
folder_path = os.path.join('Exports', brand, catalog, 'Source Codes')
if not os.path.exists(folder_path):
    os.makedirs(folder_path)

# Export the data frame into multiple CSV files
for i in range(num_chunks):
    start = i * chunk_size
    end = (i + 1) * chunk_size
    chunk = df.iloc[start:end]
    filename = os.path.join(folder_path, f'{brand}_{catalog}_{naming_convention}{i + 1}.csv')
    chunk.to_csv(filename, index=False)