In [None]:
import pandas as pd
import numpy as np
import pyodbc # SQL Connection
import sqlCredentials as sql

In [None]:
def clean_up(phrase):
    phrase = phrase.lstrip()
    phrase = phrase.rstrip()
    phrase = phrase.upper()
    return phrase

In [None]:
#BookXCenterProduction
prod_db = pyodbc.connect(
    'DRIVER={ODBC Driver 17 for SQL Server};'
    'Server=52.86.56.66;'
    'Database=BookXCenterProduction;'
    'UID='+sql.username+';'
    'PWD='+sql.password+';'
    'Trusted_connection=no;'
)

In [None]:
receiving_sql = """
SELECT
	[MostRecent]
	,[BPName]
	,[DocName]
	,[ISBN]
      ,[Currency]
	,[DateAdded] AS 'po_date'
	,[PostingDate] AS 'receiving_date'
  FROM [BookXCenterProduction].[SAP].[GoodsReceiptReportView]
"""

In [None]:
receiving = pd.read_sql(receiving_sql, prod_db)
receiving.head()

In [None]:
receiving.dtypes

In [None]:
receiving['Currency'].unique()

In [None]:
receiving.drop('MostRecent', axis = 1, inplace = True)

In [None]:
receiving.isna().sum()

In [None]:
receiving.shape

In [None]:
receiving.dropna(inplace = True)
receiving.shape

In [None]:
sup_names = pd.read_excel("important_files/Supplier Names .xlsx")
sup_names.drop(columns = ['AvgLeadTime'], inplace = True)
sup_names.head()


## Cleaning Tables

In [None]:
# sup_names
sup_names.columns = map(str.lower, sup_names.columns)
sup_names = sup_names.fillna('N/A')
for col in ['supplier sap name', 'supplier nickname']:
    sup_names[col] = sup_names.apply(lambda x: clean_up(x[col]), axis =1)
sup_names.head()

In [None]:
# Receiving
receiving.columns = map(str.lower, receiving.columns)
receiving = receiving.fillna('N/A')
receiving['isbn'] = receiving['isbn'].astype(str)
for col in ['docname', 'isbn', 'bpname']:
    receiving[col] = receiving.apply(lambda x: clean_up(x[col]), axis =1)
receiving['currency'] = np.where((receiving['currency'] == '') | (receiving['currency'] == '$'), 'USD', receiving['currency'])
receiving.head()

In [None]:
receiving_suplier = pd.merge(receiving, sup_names, how='left', left_on = 'bpname', right_on = 'supplier sap name')
receiving_suplier.head()

In [None]:
receiving_suplier.isna().sum()

In [None]:
receiving_suplier.dropna(inplace = True)

In [None]:
receiving_suplier.shape

In [None]:
receiving_suplier.isna().sum()

In [None]:
receiving_suplier.head()

In [None]:
receiving_suplier.drop(columns = ['bpname', 'supplier sap name'], inplace = True)
receiving_suplier.rename(columns={'supplier nickname': 'supplier'}, inplace = True)
receiving_suplier.head()


In [None]:
ca = pd.read_csv('important_files/python/ca_results.csv')
ca.head()


In [None]:
# Bibliography
bilblo_sql = """
SELECT Isbn
    , Publisher
FROM 
    Isbn.Bibliography
"""
biblio = pd.read_sql(bilblo_sql, prod_db)
biblio.columns = map(str.lower, biblio.columns)
biblio = biblio.fillna('N/A')
biblio['isbn'] = biblio['isbn'].astype(str)
for col in list(biblio.columns):
    biblio[col] = biblio.apply(lambda x: clean_up(x[col]), axis =1)
biblio.head()

In [None]:
receiving_suplier_pub = pd.merge(receiving_suplier, biblio, how='left', on= 'isbn')
receiving_suplier_pub.head()

In [None]:
receiving_suplier_pub['lead_days'] =  receiving_suplier_pub['receiving_date'] - receiving_suplier_pub['po_date']
receiving_suplier_pub.head()

In [None]:
receiving_suplier_pub.shape

In [None]:
receiving_suplier_pub_regular = receiving_suplier_pub[~receiving_suplier_pub['docname'].str.contains('SEA')]

In [None]:
receiving_suplier_pub_regular.head()

In [None]:
receiving_suplier_pub_regular['timestamp'] = receiving_suplier_pub_regular['lead_days'].astype(str).str.split(' ').str[0].astype(int)

In [None]:
receiving_suplier_pub_regular.dtypes

In [None]:
receiving_suplier_pub_regular['timestamp'] = np.where(receiving_suplier_pub_regular['timestamp'] <6*7, 6*7, receiving_suplier_pub_regular['timestamp'])

In [None]:
alek_mcgraw = receiving_suplier_pub_regular[(receiving_suplier_pub_regular['supplier'] == 'ALEK') & (receiving_suplier_pub_regular['publisher'] == 'MCGRAW')]
alek_mcgraw.head()

In [None]:
alek_mcgraw['Q1'] = alek_mcgraw['timestamp'].quantile(0.25)
alek_mcgraw['Q3'] = alek_mcgraw['timestamp'].quantile(0.75)
alek_mcgraw['IQR'] = alek_mcgraw['Q3'] - alek_mcgraw['Q1']

In [None]:
alek_mcgraw['outlier'] = np.where(
    (
        (alek_mcgraw['timestamp'] > (alek_mcgraw['Q1'] - alek_mcgraw['IQR']*1.5))
    | (alek_mcgraw['timestamp'] < (alek_mcgraw['Q3'] + alek_mcgraw['IQR']*1.5))
    ), 'OK', 'OUTLIER'
)
alek_mcgraw.head()

In [None]:
alek_mcgraw[alek_mcgraw['outlier'] == 'OK']

In [None]:
quant_an = pd.DataFrame(columns = ['supplier','publisher','timestamp', 'min', 'avg', 'max'])
for supplier in list(receiving_suplier_pub_regular['supplier'].unique()) :
    for publisher in list(receiving_suplier_pub_regular['publisher'].unique()) :
        quant_time = receiving_suplier_pub_regular[(receiving_suplier_pub_regular['supplier'] == supplier) & (receiving_suplier_pub_regular['publisher'] == publisher)]
        quant_time['Q1'] = quant_time['timestamp'].quantile(0.25)
        quant_time['Q3'] = quant_time['timestamp'].quantile(0.75)
        quant_time['IQR'] = quant_time['Q3'] - quant_time['Q1']
        quant_time['outlier'] = np.where(
            (quant_time['timestamp'] > (quant_time['Q1'] - quant_time['IQR']*1.5))
            | (quant_time['timestamp'] < (quant_time['Q3'] + quant_time['IQR']*1.5)
            ), 'OK', 'OUTLIER'
            )
        sup_pub_days = quant_time[quant_time['outlier'] == 'OK'][['supplier', 'publisher','timestamp']]
        sup_pub_days['min'] = sup_pub_days['timestamp'].min()
        sup_pub_days['avg'] = sup_pub_days['timestamp'].mean()
        sup_pub_days['max'] = sup_pub_days['timestamp'].max()
        quant_an = quant_an.append(sup_pub_days)



In [None]:
quant_an.head()

In [None]:
unique = quant_an[['supplier', 'publisher', 'min', 'avg', 'max']].drop_duplicates(subset= ['supplier', 'publisher'], keep = 'first', ignore_index= True)
unique.head()

In [None]:
unique.shape

In [None]:
unique.to_csv('important_files/python/lead_time.csv', index= False)