In [14]:
import os
import pandas as pd
from prophet import Prophet
from datetime import datetime

# Set directory path
folder_path = r'C:\Users\pskotte\Desktop\EnsureBill'

# Initialize an empty DataFrame to hold all the data
all_data = pd.DataFrame()

# Loop through all files in the directory and read CSV files
for file_name in os.listdir(folder_path):
    if file_name.endswith('.csv'):
        file_path = os.path.join(folder_path, file_name)
        df = pd.read_csv(file_path)
        all_data = pd.concat([all_data, df], ignore_index=True)

# Create a new DataFrame with reformatted 'Transaction Date' and counts of 'Successful' and 'Failed' statuses
all_data['Transaction Date'] = pd.to_datetime(all_data['Transaction Date'])  # Keep the timestamp
summary_df = all_data.groupby(all_data['Transaction Date'].dt.strftime('%Y-%m-%d')).agg(Success=('Status', lambda x: (x == 'Successful').sum()), 
                                                       Failed=('Status', lambda x: (x == 'Failed').sum())).reset_index()
summary_df['Transaction Date'] = pd.to_datetime(summary_df['Transaction Date'])  # Parse the grouped date back to datetime

# Add a 'Total' column that sums 'Success' and 'Failed' for each transaction date
summary_df['Total'] = summary_df['Success'] + summary_df['Failed']

# Summarize by month for graph_df
graph_df = summary_df.copy()
graph_df['Transaction Date'] = pd.to_datetime(graph_df['Transaction Date']).dt.to_period('M')
graph_df = graph_df.groupby('Transaction Date').sum().reset_index()
graph_df['Transaction Date'] = graph_df['Transaction Date'].dt.to_timestamp()
graph_df = graph_df.sort_values(by='Transaction Date')

# Calculate the average count for each weekday from the entire unfiltered dataset
all_data['Weekday'] = all_data['Transaction Date'].dt.day_name()
average_weekday_counts = all_data.groupby('Weekday').size().mean()

print(f'Average count for each weekday: {average_weekday_counts}')

# Calculate the average count for each individual weekday
day_of_week_counts = all_data.groupby('Weekday').size()
print(f'Average count per each individual weekday: \n{day_of_week_counts}')

# Prepare the data for Prophet and remove the last row
prophet_training_df = summary_df[['Transaction Date', 'Total']].rename(columns={'Transaction Date': 'ds', 'Total': 'y'}).iloc[:-2]

# Prophet model for Total column
model = Prophet()
model.fit(prophet_training_df)
future = model.make_future_dataframe(periods=1)
forecast = model.predict(future)

# Extract the prediction for the next step in the series
next_step_prediction = forecast[['ds', 'yhat']].iloc[-1]
previous_step_prediction = forecast[['ds', 'yhat']].iloc[-2]

# Get the latest date from the dataset
latest_date = summary_df['Transaction Date'].max()
yesterday_date = latest_date - pd.Timedelta(days=1)

# Calculate yesterday's actual total value by summing the 'Total' of the latest date
yesterday_actual = summary_df[summary_df['Transaction Date'] == yesterday_date]['Total'].sum()

# Calculate average of today's weekday based on the last 4 same weekdays
weekday_today = latest_date.weekday()
last_4_same_weekdays = summary_df[summary_df['Transaction Date'].dt.weekday == weekday_today].tail(4)['Total'].mean()

# Determine the closer prediction
closer_prediction = previous_step_prediction['yhat'] if abs(previous_step_prediction['yhat'] - yesterday_actual) < abs(last_4_same_weekdays - yesterday_actual) else last_4_same_weekdays

# Calculate the sum of the last 4 days of the same weekday and divide by 4 to get the average
recent_4_same_weekdays = summary_df[summary_df['Transaction Date'].dt.weekday == weekday_today].tail(4)['Total']
recent_4_same_weekdays_avg = recent_4_same_weekdays.sum() / len(recent_4_same_weekdays)

# Create the formatted DataFrame
output_df = pd.DataFrame({
    'Prediction Date': [previous_step_prediction['ds']],
    "Yesterday's Actual": [yesterday_actual],
    "Yesterday's Predicted": [previous_step_prediction['yhat']],
    'Closer Prediction': [closer_prediction],
    'Recent 4 Same Weekdays Avg': [recent_4_same_weekdays_avg]
})

# Export the prediction to a CSV file
output_file_path = r'C:\Users\pskotte\Desktop\Power Bi Data\Total.csv'
output_df.to_csv(output_file_path, index=False)

# Export the graph summary to a CSV file
graph_output_file_path = r'C:\Users\pskotte\Desktop\Power Bi Data\Graph.csv'
graph_df.to_csv(graph_output_file_path, index=False)

# Export the summary_df to a CSV file
summary_output_file_path = r'C:\Users\pskotte\Desktop\Power Bi Data\Summary.csv'
summary_df.to_csv(summary_output_file_path, index=False)

# Display the first few rows of the summary DataFrame
print(summary_df.head())
print(output_df)
print(graph_df.head())

Average count for each weekday: 29288.0
Average count per each individual weekday: 
Weekday
Friday       27547
Monday       36053
Saturday     27820
Sunday       28246
Thursday     31651
Tuesday      28497
Wednesday    25202
dtype: int64


15:07:45 - cmdstanpy - INFO - Chain [1] start processing
15:07:45 - cmdstanpy - INFO - Chain [1] done processing


  Transaction Date  Success  Failed  Total
0       2023-09-01     1184     192   1376
1       2023-09-02      644     155    799
2       2023-09-03      436     117    553
3       2023-09-04     1229     263   1492
4       2023-09-05      627     149    776
  Prediction Date  Yesterday's Actual  Yesterday's Predicted  \
0      2024-05-19                1591             779.449668   

   Closer Prediction  Recent 4 Same Weekdays Avg  
0         779.449668                       607.5  
  Transaction Date  Success  Failed  Total
0       2023-09-01    13637    2750  16387
1       2023-10-01    24630    5258  29888
2       2023-11-01    21090    4488  25578
3       2023-12-01    22021    4639  26660
4       2024-01-01    22819    4836  27655
