In [1]:
import pyodbc
import pandas as pd
from datetime import datetime

# Get today's date
end_date_str = datetime.now().strftime('%Y-%m-%d')

# SQL query to run
query = f"""
SELECT CAST(CAPJ.ConsumerPaymentEnteredDate AS DATE) AS 'Date', SUM(CAPJ.[ConsumerAccountPaymentAmount]) AS 'Total Payment Amount', COUNT(*) AS 'Total Transaction Count'
  FROM [ref].[DM_Consumer_Account_Payment_Journal] CAPJ
  INNER JOIN [ref].[DM_Consumer_Payment_Journal] CPJ ON CAPJ.[ConsumerPaymentJournalID] = CPJ.[ConsumerPaymentJournalID]
  WHERE CAST(CAPJ.ConsumerPaymentEnteredDate AS DATE) BETWEEN '2023-10-15' AND '{end_date_str}'
    AND CAPJ.BucketTransactionTypeCode IN (
	2
	)
	--DEFINE CARD PAYMENT FIELDS
	AND PaymentMemoID IN (
	23,
	13,
	12
	)
	AND ConsumerAccountPaymentReversingEntryID IS NULL
  GROUP BY CAST(CAPJ.ConsumerPaymentEnteredDate AS DATE);
"""

# Connect to the database
conn = pyodbc.connect(
    'DRIVER={ODBC Driver 17 for SQL Server};'
    'SERVER=azwsynt00.sql.azuresynapse.net;'
    'DATABASE=AZWSYNT00;'
    'Trusted_Connection=yes;'
)

# Execute the query
result_df = pd.read_sql_query(query, conn)

# Close the connection
conn.close()

# Save the result to a CSV file
output_file_path = r'C:\Users\pskotte\Desktop\Predictions.csv'
result_df.to_csv(output_file_path, index=False)

# Sort the DataFrame by the 'Date' column in descending order
result_df_sorted = result_df.sort_values(by='Date', ascending=True)

# Save the sorted DataFrame over the original CSV file
result_df_sorted.to_csv(output_file_path, index=False)

# Display the sorted result
display(result_df_sorted)

  result_df = pd.read_sql_query(query, conn)


Unnamed: 0,Date,Total Payment Amount,Total Transaction Count
41,2023-10-15,760862.65,11328
35,2023-10-16,645810.65,7948
216,2023-10-17,1046480.01,14143
44,2023-10-18,884092.81,11379
203,2023-10-19,3962947.77,66273
...,...,...,...
15,2024-05-30,8998085.16,125489
109,2024-05-31,1764100.17,21762
161,2024-06-01,603501.83,6762
8,2024-06-02,147203.09,1450


In [2]:
import pandas as pd
from prophet import Prophet
from datetime import datetime, timedelta

# Use the result_df_sorted DataFrame
result_df_sorted = pd.read_csv(r'C:\Users\pskotte\Desktop\Predictions.csv')
# Convert the 'Date' column to datetime using mixed format
result_df_sorted['Date'] = pd.to_datetime(result_df_sorted['Date'], dayfirst=True, errors='coerce')
# Remove rows with NaT values formed due to incorrect date parsing
result_df_sorted = result_df_sorted.dropna(subset=['Date'])

# Check the last date in the 'Date' column
last_date = result_df_sorted['Date'].max()
current_date = datetime.now().date()
yesterday_date = current_date - timedelta(days=1)

if last_date == pd.Timestamp(current_date):
    # Drop the last row if the last date is today
    result_df_sorted = result_df_sorted.iloc[:-1]

# Ensure the index is set correctly after date conversion
result_df_sorted.set_index('Date', inplace=True)

# Prophet model
df_prophet = result_df_sorted.reset_index().rename(columns={'Date': 'ds', 'Total Transaction Count': 'y'})
model = Prophet(seasonality_mode='multiplicative', seasonality_prior_scale=1.5)
model.add_seasonality(name='weekly', period=7, fourier_order=5)
model.add_seasonality(name='monthly', period=30.5, fourier_order=5)
model.add_seasonality(name='yearly', period=365.25, fourier_order=10)
model.fit(df_prophet)
future = model.make_future_dataframe(periods=1, freq='D')
forecast = model.predict(future)
prophet_prediction = forecast.iloc[-1]['yhat']

prediction_date = result_df_sorted.index.max() + pd.Timedelta(days=1)

# Calculate the average of the last 4 similar weekdays
last_4_similar_weekdays = result_df_sorted[result_df_sorted.index.weekday == prediction_date.weekday()].tail(4)['Total Transaction Count'].mean()

# Calculating the desired average
average_prediction_last_4 = (prophet_prediction + last_4_similar_weekdays) / 2

print(average_prediction_last_4)

  result_df_sorted['Date'] = pd.to_datetime(result_df_sorted['Date'], dayfirst=True, errors='coerce')
08:31:24 - cmdstanpy - INFO - Chain [1] start processing
08:31:24 - cmdstanpy - INFO - Chain [1] done processing


17655.217461417218


In [7]:
import os
import pandas as pd
from datetime import timedelta
import pyodbc

# Set directory path
folder_path = r'C:\Users\pskotte\Desktop\Return Rate'

# Initialize a total sum variable
row_count_total = 0

# Initialize an empty DataFrame to hold all the data
all_data = pd.DataFrame()

# Loop through all files in the directory and count rows for CSV files
for file_name in os.listdir(folder_path):
    if file_name.endswith('.csv'):
        file_path = os.path.join(folder_path, file_name)
        df = pd.read_csv(file_path)
        row_count = len(df)
        row_count_total += row_count
        print(f'File: {file_name}, Row count: {row_count}')
        all_data = pd.concat([all_data, df], ignore_index=True)

print(f'Total row count in all CSV files: {row_count_total}')

# Get the date range from the Report Date column
all_data["Report Date"] = pd.to_datetime(all_data["Report Date"])
min_date = all_data["Report Date"].min()
max_date = all_data["Report Date"].max()

# Convert to the format 'YYYY-MM-DD'
min_date_str = min_date.strftime('%Y-%m-%d')
max_date_str = max_date.strftime('%Y-%m-%d')

# SQL query
query = f"""
SELECT SUM(CAPJ.[ConsumerAccountPaymentAmount]) AS 'Total Payment Amount', COUNT(*) AS 'Total Transaction Count'
FROM [ref].[DM_Consumer_Account_Payment_Journal] CAPJ
INNER JOIN [ref].[DM_Consumer_Payment_Journal] CPJ ON CAPJ.[ConsumerPaymentJournalID] = CPJ.[ConsumerPaymentJournalID]
WHERE CAST(CAPJ.ConsumerPaymentEnteredDate AS DATE) BETWEEN '{min_date_str}' AND '{max_date_str}'
AND CAPJ.BucketTransactionTypeCode IN (
	2
)
--DEFINE CARD PAYMENT FIELDS
AND PaymentMemoID IN (
	23,
	13,
	12
)
AND ConsumerAccountPaymentReversingEntryID IS NULL;"""

# Connect to the database
conn = pyodbc.connect(
    'DRIVER={ODBC Driver 17 for SQL Server};'
    'SERVER=azwsynt00.sql.azuresynapse.net;'
    'DATABASE=AZWSYNT00;'
    'Trusted_Connection=yes;'
)

# Execute the query
result_df = pd.read_sql_query(query, conn)

# Close the connection
conn.close()

# Calculate the ratio as a percentage
total_transaction_count = result_df['Total Transaction Count'].iloc[0] + average_prediction_last_4
ratio = (row_count_total / total_transaction_count) * 100
print(f'Return Rate: {ratio:.2f}%')

File: 2024-05-01 Part 2.csv, Row count: 5248
File: 2024-05-02 Part 2.csv, Row count: 4129
File: 2024-05-03 Part 2.csv, Row count: 1879
File: 2024-05-06 Part 2.csv, Row count: 3436
File: 2024-05-07 Part 2.csv, Row count: 4691
File: 2024-05-08 Part 2.csv, Row count: 2424
File: 2024-05-09 Part 2.csv, Row count: 1150
File: 2024-05-10 Part 2.csv, Row count: 1686
File: 2024-05-13 Part 2.csv, Row count: 4126
File: 2024-05-14 Part 2.csv, Row count: 5590
File: 2024-05-15 Part 2.csv, Row count: 2346
File: 2024-05-16 Part 2.csv, Row count: 2463
File: 2024-05-17 Part 2.csv, Row count: 3447
File: 2024-05-20 Part 2.csv, Row count: 4477
File: 2024-05-21 Part 2.csv, Row count: 6335
File: 2024-05-21 Part 3.csv, Row count: 88
File: 2024-05-22 Part 2.csv, Row count: 3361
File: 2024-05-23 Part 2.csv, Row count: 1998
File: 2024-05-24 Part 2.csv, Row count: 2152
File: 2024-05-28 Part 2.csv, Row count: 4372
File: 2024-05-29 Part 2.csv, Row count: 6387
File: 2024-05-29 Part 3.csv, Row count: 280
File: 2024-05

  all_data["Report Date"] = pd.to_datetime(all_data["Report Date"])
  result_df = pd.read_sql_query(query, conn)


Return Rate: 12.72%


In [8]:
total_transaction_count

772519.2174614172

In [9]:
print(max_date_str)

2024-06-04
