In [None]:
# packages and dependencies import
import os
import sys
import json
import pandas as pd # used for data manipulation
import numpy as np # used for mathematical operations

# Used for data transfer and visualization 
import plotly.graph_objects as go
import plotly.io as pio
from plotly.utils import PlotlyJSONEncoder 
from plotly.subplots import make_subplots

In [None]:
# Data Import
data = pd.read_csv("/home/tjselevani/Desktop/Apps/vscode/python/python analysis/data/last-3-months-transactions.csv")

# Convert 'created_at' column to datetime if not already
data['created_at'] = pd.to_datetime(data['created_at'])

#Extract date 
data.loc[:, 'date'] = data['created_at'].dt.date

# Extract time in minutes since start of the day
data.loc[:, 'minutes'] = data['created_at'].dt.hour * 60 + data['created_at'].dt.minute

# Extract time in hours since start of the day
data.loc[:, 'hours'] = data['created_at'].dt.hour

# Extract the day of the week (0 = Monday, 6 = Sunday)
data.loc[:, 'day'] = data['created_at'].dt.dayofweek

# Extract the week from 'created_at'
data.loc[:, 'week'] = data['created_at'].dt.to_period('W').astype(str)

In [None]:
data

In [None]:
def format_minutes_xticks():
    """Returns tick positions and labels for minutes of the day."""
    return range(0, 1441, 60), [f"{h}:00" for h in range(0, 25)]

def format_minutes_xticks():
    """Generates tick positions and labels for the x-axis (time from 00:00 to 24:00)."""
    xticks = list(range(0, 1441, 60))  # Every 2 hours
    labels = [f"{h:02d}:00" for h in range(0, 25)]  # Format as HH:MM
    return xticks, labels

def format_hours_xticks():
    """Returns tick positions and labels for hours of the day."""
    return range(0, 25, 1), [f"{h}:00" for h in range(0, 25)]

def format_days_xticks():
    """Returns tick positions and labels for days of the week."""
    days = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
    return range(len(days)), days

In [None]:
data = data[data['transaction_type'] == 'CREDIT'].copy()

In [None]:
# Define unique colors for each vehicle, plus "Unknown" and "Failed Transactions"
vehicle_colors = {
    'SM191': 'blue',
    'SM192': 'green',
    'SM944': 'yellow',
    'SM055': 'purple',
    'SM024': 'orange',
    'Unknown': 'gray',
    'Failed': 'red'
}

# Map colors to vehicle_booked, filling NaN values with a default color (e.g., gray)
data.loc[:,'color'] = data['vehicle_booked'].map(vehicle_colors).fillna('gray')

# Define vehicles list (unique vehicles from the data)
vehicles = data['vehicle_booked'].dropna().unique()

In [None]:
# generate_day_earnings_with_scatter_bundle_plotly("2024-12-06", "day_scatter_earnings")
# generate_day_earnings_with_scatter_bundle_plotly_xx("2024-12-06", "day_scatter_earnings")
# generate_day_earnings_with_scatter_bundle_plotly_x1("2024-12-06", "SM055", "day_scatter_earnings")
# generate_day_earnings_with_hourly_bundle_plotly("2024-12-06", "day_hour_bundled_earnings")
# generate_day_earnings_with_week_bundle_plotly("2024-12-06", "day_week_bundled_earnings")
# generate_fare_trends_plotly("daily_fare_trends")
# generate_fare_trends_plotly_xx("daily_fare_trends")
# generate_fare_trends_plotly_x1("SM024", "daily_fare_trends")

In [None]:

def generate_day_earnings_with_scatter_bundle_plotly(date, output_file):
    """
    Generates a scatter plot for all transactions on a specific day, saving:
    - A PNG image using Matplotlib.
    - A JSON file using Plotly.

    Parameters:
    - date: The specific date to visualize.
    - output_file: The base name for the saved files.
    """
    
    # Filter transactions for a specific date
    specific_date = pd.to_datetime(date).date()

    day_data = data[data['created_at'].dt.date == specific_date].copy()

    if day_data.empty:
        print(f"No transactions found for {specific_date}.")
        return

    # Override failed transactions (status 3) to always be red
    day_data.loc[day_data['payment_status'] == 3, 'color'] = 'red'

    failed_data = day_data[day_data['payment_status'] == 3]

    # **Plotly: Save as JSON for interactivity**
    fig = go.Figure()

    # Plot each vehicle separately for clear legend
    for vehicle, color in vehicle_colors.items():
        vehicle_data = day_data[day_data['vehicle_booked'] == vehicle]
        if not vehicle_data.empty:
            fig.add_trace(go.Scatter(
                x=vehicle_data['minutes'], 
                y=vehicle_data['amount'], 
                mode='markers',
                marker=dict(color=color, opacity=0.6),
                name=vehicle
            ))

    # Add failed transactions separately
    if not failed_data.empty:
        fig.add_trace(go.Scatter(
            x=failed_data['minutes'], 
            y=failed_data['amount'], 
            mode='markers',
            marker=dict(color='red', opacity=0.6),
            name="Failed Transactions"
        ))

    xticks, labels = format_minutes_xticks()

    fig.update_layout(
        title=f"Scatter Plot of Credit Transactions ({specific_date})",
        xaxis=dict(
            title="Time of Day",
            tickvals=xticks,
            ticktext=labels
        ),
        yaxis_title="Transaction Amount (Ksh)",
        template="plotly_white"
    )

    plot_data_json = fig.to_json()
    
    # Define output directories
    json_dir = f"../json/all/day"
    os.makedirs(json_dir, exist_ok=True)

    json_path = f"{json_dir}/{output_file}_{date}.json"
    with open(json_path, 'w') as json_file:
        json_file.write(plot_data_json)

    print(f"Saved JSON Plotly data: {json_path}")

    fig.show()
# Example usage:
generate_day_earnings_with_scatter_bundle_plotly("2024-12-12", "day_scatter_earnings")

In [None]:
def generate_day_earnings_with_scatter_bundle_plotly_xx(date, output_filename):
    """
    Generates scatter plots for each vehicle's transactions on a specific day, saving:
    - A PNG image using Matplotlib.
    - A JSON file using Plotly.

    Parameters:
    - data: Pandas DataFrame containing transaction data.
    - date: The specific date to visualize.
    - output_filename: The base name for the saved files.
    """

    # Ensure `created_at` column is in datetime format
    data['created_at'] = pd.to_datetime(data['created_at'])

    # Filter transactions for the specified date
    specific_date = pd.to_datetime(date).date()
    day_data = data[data['created_at'].dt.date == specific_date].copy()

    if day_data.empty:
        print(f"No transactions found for {specific_date}.")
        return

    # Ensure failed transactions (status 3) are always red
    day_data.loc[day_data['payment_status'] == 3, 'color'] = 'red'

    xx = []

    # Process each vehicle separately
    for vehicle_name in day_data['vehicle_booked'].unique():
        vehicle_data = day_data[day_data['vehicle_booked'] == vehicle_name]

        if vehicle_data.empty:
            continue  # Skip vehicles with no transactions

        # Get color for the vehicle (default to gray if not found)
        vehicle_color = vehicle_colors.get(vehicle_name, 'gray')

        failed_data = vehicle_data[vehicle_data['payment_status'] == 3]

        # **Plotly: Save as JSON**
        fig = go.Figure()
        fig.add_trace(go.Scatter(
            x=vehicle_data['minutes'], 
            y=vehicle_data['amount'], 
            mode='markers',
            marker=dict(color=vehicle_color, opacity=0.6),
            name=vehicle_name
        ))

        # Add failed transactions separately
        if not failed_data.empty:
            fig.add_trace(go.Scatter(
                x=failed_data['minutes'], 
                y=failed_data['amount'], 
                mode='markers',
                marker=dict(color='red', opacity=0.6),
                name="Failed Transactions"
            ))

        xticks, labels = format_minutes_xticks()

        fig.update_layout(
            title=f"Scatter Plot of Transactions ({specific_date}) - {vehicle_name}",
            xaxis=dict(
                title="Time of Day",
                tickvals=xticks,
                ticktext=labels
            ),
            yaxis_title="Transaction Amount (Ksh)",
            template="plotly_white"
        )

        plot_data_json = fig.to_json()

        # Create directories for saving JSON data
        json_dir = f"../json/{vehicle_name}/eda/day"
        os.makedirs(json_dir, exist_ok=True)

        json_path = f"{json_dir}/{output_filename}.json"
        with open(json_path, 'w') as json_file:
            json_file.write(plot_data_json)

        xx.append(json_path)

        print(f"Saved JSON for {vehicle_name}: {json_path}")
    # return xx

# Example usage:
# generate_day_earnings_with_scatter_bundle_plotly_xx("2024-12-06", "day_scatter_earnings")


In [None]:
def generate_day_earnings_with_scatter_bundle_plotly_x1(date, vehicle_name, output_filename):
    """
    Generates scatter plots for each vehicle's transactions on a specific day, saving:
    - A PNG image using Matplotlib.
    - A JSON file using Plotly.

    Parameters:
    - data: Pandas DataFrame containing transaction data.
    - date: The specific date to visualize.
    - output_filename: The base name for the saved files.
    """

    # Ensure `created_at` column is in datetime format
    data['created_at'] = pd.to_datetime(data['created_at'])

    # Filter transactions for the specified date
    specific_date = pd.to_datetime(date).date()
    day_data = data[data['created_at'].dt.date == specific_date].copy()

    if day_data.empty:
        print(f"No transactions found for {specific_date}.")
        return

    # Ensure failed transactions (status 3) are always red
    day_data.loc[day_data['payment_status'] == 3, 'color'] = 'red'

    xx = []

    # Process vehicle separately

    vehicle_data = day_data[day_data['vehicle_booked'] == vehicle_name]

    if vehicle_data.empty:
        print("vehicle data not available")
        return  # Skip vehicles with no transactions

    # Get color for the vehicle (default to gray if not found)
    vehicle_color = vehicle_colors.get(vehicle_name, 'gray')

    failed_data = vehicle_data[vehicle_data['payment_status'] == 3]

    # **Plotly: Save as JSON**
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=vehicle_data['minutes'], 
        y=vehicle_data['amount'], 
        mode='markers',
        marker=dict(color=vehicle_color, opacity=0.6),
        name=vehicle_name
    ))

    # Add failed transactions separately
    if not failed_data.empty:
        fig.add_trace(go.Scatter(
            x=failed_data['minutes'], 
            y=failed_data['amount'], 
            mode='markers',
            marker=dict(color='red', opacity=0.6),
            name="Failed Transactions"
        ))

    xticks, labels = format_minutes_xticks()

    fig.update_layout(
        title=f"Scatter Plot of Transactions ({specific_date}) - {vehicle_name}",
        xaxis=dict(
            title="Time of Day",
            tickvals=xticks,
            ticktext=labels
        ),
        yaxis_title="Transaction Amount (Ksh)",
        template="plotly_white"
    )

    plot_data_json = fig.to_json()

    # Create directories for saving JSON data
    json_dir = f"../json/{vehicle_name}/eda/day"
    os.makedirs(json_dir, exist_ok=True)

    json_path = f"{json_dir}/{output_filename}.json"
    with open(json_path, 'w') as json_file:
        json_file.write(plot_data_json)

    xx.append(json_path)

    print(f"Saved JSON for {vehicle_name}: {json_path}")

    # fig.show()
    # return xx

# Example usage
# generate_day_earnings_with_scatter_bundle_plotly_x1("2024-12-06", "SM055", "day_scatter_earnings")


In [None]:
def generate_day_earnings_with_hourly_bundle_plotly(date, output_file):
    specific_day = date
    specific_date = pd.to_datetime(specific_day)
    
    day_data = data.loc[data['created_at'].dt.date == specific_date.date(), :].copy()
    
    # Override failed transactions (status 3) to always be red
    data.loc[data['payment_status'] == 3, 'color'] = 'red'
    
    # Aggregate sum of amounts per vehicle per hour
    hourly_sums = day_data.groupby(['vehicle_booked', 'hours'])['amount'].sum().reset_index()
    
    pivot_data = hourly_sums.pivot(index='vehicle_booked', columns='hours', values='amount').fillna(0)
    
    all_hours = np.arange(24)  # 0 to 23 hours
    pivot_data = pivot_data.reindex(columns=all_hours, fill_value=0)
    
    unknown_transactions = day_data[day_data['vehicle_booked'].isna() | ~day_data['vehicle_booked'].isin(vehicles)]
    if not unknown_transactions.empty:
        unknown_sums = unknown_transactions.groupby('hours')['amount'].sum()
        pivot_data.loc['Unknown'] = 0
        for hour, amount in unknown_sums.items():
            pivot_data.loc['Unknown', hour] = amount
    
    failed_transactions = day_data[day_data['payment_status'] == 3]
    if not failed_transactions.empty:
        failed_sums = failed_transactions.groupby('hours')['amount'].sum()
        pivot_data.loc['Failed'] = 0
        for hour, amount in failed_sums.items():
            pivot_data.loc['Failed', hour] = amount
    
    fig = go.Figure()
    num_vehicles = len(pivot_data.index)
    bar_width = 0.8 / num_vehicles  # Distribute bars within 0.8 space
    
    for i, vehicle in enumerate(pivot_data.index):
        offset = -0.4 + i * bar_width  # Ensuring bars are side by side
        fig.add_trace(go.Bar(
            x=[hour + offset for hour in all_hours],
            y=pivot_data.loc[vehicle],
            name=vehicle,
            marker=dict(color=vehicle_colors.get(vehicle, 'gray')),
            width=bar_width
        ))
    
    # Add dotted vertical lines to separate hours
    for hour in all_hours:
        fig.add_trace(go.Scatter(
            x=[hour, hour], y=[0, pivot_data.values.max()],
            mode='lines',
            line=dict(dash='dot', color='gray'),
            showlegend=False
        ))
    
    fig.update_layout(
        title=f"Total Transaction Amount per Hour of the Day ({date})",
        xaxis=dict(
            title='Time of Day',
            tickmode='array',
            tickvals=all_hours,
            ticktext=[f"{h}:00" for h in all_hours],
            showgrid=False
        ),
        yaxis=dict(title='Total Transaction Amount (KSH)'),
        barmode='group',
        legend_title="Vehicle"
    )

    plot_data_json = fig.to_json()
    
    plot_dir = "../json/all/day"
    os.makedirs(plot_dir, exist_ok=True)
    plot_path = f"{plot_dir}/{output_file}_{date}.json"
    
    with open(plot_path, 'w') as f:
        f.write(plot_data_json)
    
    print(f"Saved Plotly JSON for {date}: {plot_path}")

# Example Usage
# generate_day_earnings_with_hourly_bundle_plotly("2024-12-06", "day_hour_bundled_earnings")

In [None]:
def generate_day_earnings_with_week_bundle_plotly(date, output_file):
    specific_date = pd.to_datetime(date)
    start_of_week = specific_date - pd.DateOffset(days=specific_date.weekday())
    end_of_week = start_of_week + pd.DateOffset(days=6)

    weekly_data = data.loc[
        (data['created_at'].dt.date >= start_of_week.date()) &
        (data['created_at'].dt.date <= end_of_week.date())
    ].copy()

    weekly_data['day'] = weekly_data['created_at'].dt.dayofweek
    weekly_data['date'] = weekly_data['created_at'].dt.date

    unique_dates = sorted(weekly_data['date'].unique())
    date_labels = [pd.to_datetime(date).strftime('%a\n%m/%d') for date in unique_dates]

    daily_revenue = weekly_data.groupby(['vehicle_booked', 'date'])['amount'].sum().reset_index()
    pivot_data = daily_revenue.pivot(index='vehicle_booked', columns='date', values='amount').fillna(0)

    fig = go.Figure()

    for vehicle in pivot_data.index:
        fig.add_trace(go.Bar(
            x=date_labels,
            y=pivot_data.loc[vehicle],
            name=vehicle,
            text=pivot_data.loc[vehicle].astype(int),
            textposition='outside'
        ))

    fig.update_layout(
        title=f"Weekly Revenue by Vehicle ({start_of_week.date()} to {end_of_week.date()})",
        xaxis_title="Day of the Week",
        yaxis_title="Total Revenue Amount (KSH)",
        barmode='group',
        xaxis=dict(tickmode='array', tickvals=list(range(len(date_labels))), ticktext=date_labels),
    )

    plot_data_json = fig.to_json()

    plot_dir = "../json/all/day"
    os.makedirs(plot_dir, exist_ok=True)
    plot_path = f"{plot_dir}/{output_file}_{date}.json"

    with open(plot_path, 'w') as f:
        f.write(plot_data_json)
    
    print(f"Saved Plotly graph data for {start_of_week.date()} to {end_of_week.date()}: {plot_path}")

# Example Usage
# generate_day_earnings_with_week_bundle_plotly("2024-12-06", "day_week_bundled_earnings")


In [None]:
def generate_fare_trends_plotly(output_file):
    global data  # Explicitly reference the global `data` variable
    
    # Remove invalid bookings
    data = data.dropna(subset=['vehicle_booked'])
    data = data[~data['vehicle_booked'].isin(["Failed", "Unknown"])]

    # Aggregate fares
    daily_vehicle_fares = data.groupby(['date', 'vehicle_booked'])['amount'].sum().reset_index()
    pivot_data = daily_vehicle_fares.pivot(index='date', columns='vehicle_booked', values='amount').fillna(0)

    # Ensure all vehicles and dates are present
    all_days = sorted(data['date'].unique())
    pivot_data = pivot_data.reindex(all_days, fill_value=0)
    for vehicle in vehicles:
        if vehicle not in pivot_data.columns:
            pivot_data[vehicle] = 0

    # Create a Plotly figure
    fig = go.Figure()
    for vehicle in vehicles:
        if vehicle in pivot_data.columns:
            fig.add_trace(go.Scatter(
                x=pivot_data.index,
                y=pivot_data[vehicle],
                mode='lines+markers',
                name=vehicle,
                line=dict(color=vehicle_colors.get(vehicle, 'gray'))
            ))

    # Layout customization
    fig.update_layout(
        title="Daily Fare Trends by Vehicle",
        xaxis_title="Date",
        yaxis_title="Total Earnings (KSH)",
        xaxis=dict(tickangle=-45),
        legend_title="Vehicle",
        template="plotly_white"
    )

    plot_data_json = fig.to_json()

    # Save JSON file
    plot_dir = os.path.join("..", "json", "all", "day")
    os.makedirs(plot_dir, exist_ok=True)
    json_path = os.path.join(plot_dir, f"{output_file}.json")
    with open(json_path, "w") as f:
        f.write(plot_data_json)

    print(f"Saved combined fare trends JSON: {json_path}")

# Call the function
# generate_fare_trends_plotly("daily_fare_trends")


In [None]:

def generate_fare_trends_plotly_xx(output_file):
    """
    Generates individual daily fare trend plots using Plotly Graph Objects and saves each as a JSON file.

    Parameters:
    - data (pd.DataFrame): Data containing 'date', 'vehicle_booked', and 'amount'
    - vehicles (list): List of all vehicles to include
    - vehicle_colors (dict): Dictionary mapping vehicle names to colors
    - output_file (str): Base file name for saving the JSON
    """

    global data

    # Ensure 'date' column is formatted correctly
    data['date'] = pd.to_datetime(data['date']).dt.strftime('%Y-%m-%d')

    # Remove invalid bookings
    data = data.dropna(subset=['vehicle_booked'])
    data = data[~data['vehicle_booked'].isin(["Failed", "Unknown"])]

    # Aggregate fares
    daily_vehicle_fares = data.groupby(['date', 'vehicle_booked'])['amount'].sum().reset_index()
    pivot_data = daily_vehicle_fares.pivot(index='date', columns='vehicle_booked', values='amount').fillna(0)

    # Ensure all vehicles and dates are present
    all_days = sorted(data['date'].unique())
    pivot_data = pivot_data.reindex(all_days, fill_value=0)
    for vehicle in vehicles:
        if vehicle not in pivot_data.columns:
            pivot_data[vehicle] = 0

    # Generate individual plots
    for vehicle in vehicles:
        if vehicle in pivot_data.columns:
            fig = go.Figure()

            fig.add_trace(go.Scatter(
                x=pivot_data.index,
                y=pivot_data[vehicle],
                mode='lines+markers',
                name=vehicle,
                line=dict(color=vehicle_colors.get(vehicle, 'gray'))
            ))

            # Layout customization
            fig.update_layout(
                title=f"Daily Fare Trends - {vehicle}",
                xaxis_title="Date",
                yaxis_title="Total Earnings (KSH)",
                xaxis=dict(tickangle=-45),
                legend_title="Vehicle",
                template="plotly_white"
            )

            plot_data_json = fig.to_json()

            # Save JSON file
            plot_dir = os.path.join("..", "json", vehicle, "eda", "day")
            os.makedirs(plot_dir, exist_ok=True)
            json_path = os.path.join(plot_dir, f"{output_file}.json")
            with open(json_path, "w") as f:
                f.write(plot_data_json)

            print(f"Saved JSON for {vehicle}: {json_path}")


# Example usage:
# generate_fare_trends_plotly_xx("daily_fare_trends")


In [None]:
def generate_fare_trends_plotly_x1(vehicle_id, output_file):
    """
    Generates an individual daily fare trend plot for a single vehicle using Plotly Graph Objects and saves it as a JSON file.

    Parameters:
    - vehicle_id (str): The ID of the vehicle to plot.
    - output_file (str): Base file name for saving the JSON.
    """
    global data

    # Ensure 'date' column is formatted correctly
    data['date'] = pd.to_datetime(data['date']).dt.strftime('%Y-%m-%d')

    # Remove invalid bookings
    data = data.dropna(subset=['vehicle_booked'])
    data = data[~data['vehicle_booked'].isin(["Failed", "Unknown"])]

    # Filter data for the specified vehicle_id
    vehicle_data = data[data['vehicle_booked'] == vehicle_id]

    if vehicle_data.empty:
        print(f"No data found for vehicle: {vehicle_id}")
        return

    # Aggregate fares per day
    daily_fares = vehicle_data.groupby('date')['amount'].sum().reset_index()

    # Create the Plotly figure
    fig = go.Figure()

    fig.add_trace(go.Scatter(
        x=daily_fares['date'],
        y=daily_fares['amount'],
        mode='lines+markers',
        name=vehicle_id,
        line=dict(color=vehicle_colors.get(vehicle_id, 'gray'))
    ))

    # Layout customization
    fig.update_layout(
        title=f"Daily Fare Trends - {vehicle_id}",
        xaxis_title="Date",
        yaxis_title="Total Earnings (KSH)",
        xaxis=dict(tickangle=-45),
        legend_title="Vehicle",
        template="plotly_white"
    )

    # Save JSON file
    plot_dir = os.path.join("..", "json", vehicle_id, "eda", "day")
    os.makedirs(plot_dir, exist_ok=True)
    json_path = os.path.join(plot_dir, f"{output_file}.json")

    with open(json_path, "w") as f:
        f.write(fig.to_json())

    print(f"Saved JSON for {vehicle_id}: {json_path}")


# Example usage:
# generate_fare_trends_plotly_x1("SM024", "daily_fare_trends")
