In [None]:
# packages and dependencies import
import os
import sys
import json
import pandas as pd # used for data manipulation
import numpy as np # used for mathematical operations

# Used for Exploratory Data Analysis
import seaborn as sns 

# used for data visualization
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter

# Used for data transfer and visualization 
import plotly.graph_objects as go
import plotly.io as pio

In [None]:
# Data Import
data = pd.read_csv("/home/tjselevani/Desktop/Apps/vscode/python/python analysis/data/last-3-months-transactions.csv")

# Convert 'created_at' column to datetime if not already
data['created_at'] = pd.to_datetime(data['created_at'])

# Extract the week from 'created_at'
data.loc[:, 'week'] = data['created_at'].dt.to_period('W').astype(str)

In [None]:

def format_weeks_xticks(week_labels):
    """Returns tick positions and labels for weeks."""
    return range(len(week_labels)), week_labels

In [None]:
data = data[data['transaction_type'] == 'CREDIT'].copy()

In [None]:
# Define unique colors for each vehicle, plus "Unknown" and "Failed Transactions"
vehicle_colors = {
    'SM191': 'blue',
    'SM192': 'green',
    'SM944': 'yellow',
    'SM055': 'purple',
    'SM024': 'orange',
    'Unknown': 'gray',
    'Failed': 'red'
}

# Map colors to vehicle_booked, filling NaN values with a default color (e.g., gray)
data.loc[:,'color'] = data['vehicle_booked'].map(vehicle_colors).fillna('gray')

# Define vehicles list (unique vehicles from the data)
vehicles = data['vehicle_booked'].dropna().unique()

In [None]:
# Get all unique weeks in chronological order
all_weeks = sorted(data['week'].unique())

In [None]:
# Aggregate data by week for each vehicle
weekly_by_vehicle = data.groupby(['week', 'vehicle_booked'])['amount'].sum().reset_index()

# Pivot the data to have vehicles as columns
weekly_pivot = weekly_by_vehicle.pivot(index='week', columns='vehicle_booked', values='amount').reset_index()

# Sort by week (assuming format is YYYY-MM)
weekly_pivot = weekly_pivot.sort_values('week')

In [None]:
# plot_weekly_fares_line(data, "weekly_total_earnings_trend_line", vehicles)
# plot_weekly_fares_bar(data, "weekly_total_earnings_trend_line", vehicles)
# plot_weekly_total_revenue_line(data, "weekly_total_earnings_bar")
# plot_weekly_total_revenue_bar(data, "weekly_total_earnings_bar")
# plot_weekly_breakdown_by_vehicle("weekly_stacked_earnings")

In [None]:
def plot_weekly_fares_line(output_file):

    # Aggregate total fare for each vehicle by week
    weekly_vehicle_fares = data.groupby(['week', 'vehicle_booked'])['amount'].sum().reset_index()
    
    # Create a pivot table for better visualization
    pivot_data = weekly_vehicle_fares.pivot(index='week', columns='vehicle_booked', values='amount').fillna(0)
    
    # Ensure all vehicles are present
    for vehicle in vehicles:
        if vehicle not in pivot_data.columns:
            pivot_data[vehicle] = 0
    
    # Reindex with all weeks to ensure chronological order and fill missing weeks
    all_weeks = sorted(data['week'].unique())
    pivot_data = pivot_data.reindex(all_weeks).fillna(0)
    
    plot_data = go.Figure()

    for vehicle in vehicles:
        if vehicle in pivot_data.columns:
            plot_data.add_trace(go.Scatter(
                x=all_weeks,
                y=pivot_data[vehicle],
                mode='lines+markers',
                name=vehicle,
                line=dict(width=2),
                marker=dict(symbol='circle', size=6),
                text=pivot_data[vehicle].apply(lambda x: f"{int(x):,}"),
                textposition='top center'
            ))

    plot_data.update_layout(
        title="Weekly Fare Trends by Vehicle",
        xaxis_title="Week",
        yaxis_title="Total Fare Amount (KSH)",
        xaxis_tickangle=-45,
        template='plotly_white'
    )

    # Save Plotly JSON
    plot_data_json = plot_data.to_json()

    # Create output directories
    json_dir = f"../json/all/week"
    os.makedirs(json_dir, exist_ok=True)

    json_path = f"{json_dir}/{output_file}.json"
    with open(json_path, 'w') as json_file:
        json_file.write(plot_data_json)

    print(f"Saved JSON Plotly data: {json_path}")

    # plot_data.show()

# plot_weekly_fares_line("weekly_fares_line")


In [None]:
def plot_weekly_fares_line_x1(vehicle_id, output_file):
    # Filter data for the specific vehicle
    vehicle_data = data[data['vehicle_booked'] == vehicle_id]

    if vehicle_data.empty:
        print(f"No data found for vehicle: {vehicle_id}")
        return

    # Aggregate total fare for the vehicle by week
    weekly_fares = vehicle_data.groupby('week')['amount'].sum().reset_index()

    # Ensure all weeks are present
    all_weeks = sorted(data['week'].unique())
    weekly_fares = weekly_fares.set_index('week').reindex(all_weeks, fill_value=0).reset_index()

    # Plotting
    plot_data = go.Figure()

    plot_data.add_trace(go.Scatter(
        x=weekly_fares['week'],
        y=weekly_fares['amount'],
        mode='lines+markers',
        name=vehicle_id,
        line=dict(width=2),
        marker=dict(symbol='circle', size=6),
        text=weekly_fares['amount'].apply(lambda x: f"{int(x):,}"),
        textposition='top center'
    ))

    plot_data.update_layout(
        title=f"Weekly Fare Trends for Vehicle {vehicle_id}",
        xaxis_title="Week",
        yaxis_title="Total Fare Amount (KSH)",
        xaxis_tickangle=-45,
        template='plotly_white'
    )

    # Save Plotly JSON
    plot_data_json = plot_data.to_json()

    # Create output directories
    json_dir = f"../json/{vehicle_id}/eda/week"
    os.makedirs(json_dir, exist_ok=True)

    json_path = f"{json_dir}/{output_file}.json"
    with open(json_path, 'w') as json_file:
        json_file.write(plot_data_json)

    print(f"Saved JSON Plotly data: {json_path}")

    # plot_data.show()
# plot_weekly_fares_line_x1("SM191", "weekly_fares_line")

In [None]:
def plot_weekly_fares_bar(output_file):
    # Aggregate total fare for each vehicle by week
    weekly_vehicle_fares = data.groupby(['week', 'vehicle_booked'])['amount'].sum().reset_index()

    # Create a pivot table for better visualization
    pivot_data = weekly_vehicle_fares.pivot(index='week', columns='vehicle_booked', values='amount').fillna(0)

    # Ensure all vehicles are present
    for vehicle in vehicles:
        if vehicle not in pivot_data.columns:
            pivot_data[vehicle] = 0

    # Reindex with all weeks to ensure chronological order and fill missing weeks
    all_weeks = sorted(data['week'].unique())
    pivot_data = pivot_data.reindex(all_weeks).fillna(0)

    # Initialize Plotly figure
    fig = go.Figure()

    # Add bar trace per vehicle
    for vehicle in vehicles:
        if vehicle in pivot_data.columns:
            fig.add_trace(go.Bar(
                x=all_weeks,
                y=pivot_data[vehicle],
                name=vehicle,
                text=pivot_data[vehicle].apply(lambda x: f"{int(x):,}"),
                textposition='auto'
            ))

    # Update layout
    fig.update_layout(
        title="Weekly Fare Breakdown by Vehicle (Bar Chart)",
        xaxis_title="Week",
        yaxis_title="Total Fare Amount (KSH)",
        barmode='group',  # grouped bars (side by side)
        xaxis_tickangle=-45,
        template='plotly_white',
        legend_title="Vehicle"
    )

    # Save as JSON
    plot_data_json = fig.to_json()

    # Create output directory
    json_dir = f"../json/all/week"
    os.makedirs(json_dir, exist_ok=True)

    json_path = f"{json_dir}/{output_file}.json"
    with open(json_path, 'w') as json_file:
        json_file.write(plot_data_json)

    print(f"Saved JSON Plotly data: {json_path}")

    # fig.show()

# Example usage
# plot_weekly_fares_bar("weekly_fares_bar")

In [None]:
def plot_weekly_fares_bar_x1(vehicle_id, output_file):
    # Filter data for the specific vehicle
    vehicle_data = data[data['vehicle_booked'] == vehicle_id]

    if vehicle_data.empty:
        print(f"No data found for vehicle: {vehicle_id}")
        return

    # Aggregate total fare for the vehicle by week
    weekly_fares = vehicle_data.groupby('week')['amount'].sum().reset_index()

    # Ensure all weeks are present
    all_weeks = sorted(data['week'].unique())
    weekly_fares = weekly_fares.set_index('week').reindex(all_weeks, fill_value=0).reset_index()

    # Plotting
    fig = go.Figure()

    fig.add_trace(go.Bar(
        x=weekly_fares['week'],
        y=weekly_fares['amount'],
        name=vehicle_id,
        text=weekly_fares['amount'].apply(lambda x: f"{int(x):,}"),
        textposition='auto',
        marker_color='indianred'
    ))

    fig.update_layout(
        title=f"Weekly Fare Totals for Vehicle {vehicle_id} (Bar Chart)",
        xaxis_title="Week",
        yaxis_title="Total Fare Amount (KSH)",
        xaxis_tickangle=-45,
        template='plotly_white',
        legend_title="Vehicle"
    )

    # Save Plotly JSON
    plot_data_json = fig.to_json()

    # Create output directories
    json_dir = f"../json/{vehicle_id}/eda/week"
    os.makedirs(json_dir, exist_ok=True)

    json_path = f"{json_dir}/{output_file}.json"
    with open(json_path, 'w') as json_file:
        json_file.write(plot_data_json)

    print(f"Saved JSON Plotly data: {json_path}")

    # fig.show()

# Example usage
# plot_weekly_fares_bar_x1("SM055", "weekly_fares_bar")


In [None]:
def plot_weekly_total_revenue_line(output_file):
    # Calculate total revenue per week
    weekly_total = data.groupby('week')['amount'].sum().reset_index()
    weekly_total = weekly_total.sort_values('week')

    # Plot using Plotly
    fig = go.Figure()

    fig.add_trace(go.Scatter(
        x=weekly_total['week'],
        y=weekly_total['amount'],
        mode='lines+markers',
        line=dict(color='green', width=2),
        marker=dict(symbol='circle', size=6),
        text=weekly_total['amount'].apply(lambda x: f"{int(x):,}"),
        textposition='top center',
        name="Total Earnings"
    ))

    fig.update_layout(
        title="Weekly Total Earnings - All Vehicles (Line)",
        xaxis_title="Week",
        yaxis_title="Total Earnings (KSH)",
        xaxis_tickangle=-45,
        template='plotly_white'
    )

    # Save the Plotly JSON
    fig_json = fig.to_json()

    # Create output directories
    json_dir = f"../json/all/week"
    os.makedirs(json_dir, exist_ok=True)

    json_path = f"{json_dir}/{output_file}.json"
    with open(json_path, 'w') as json_file:
        json_file.write(fig_json)

    print(f"Saved JSON Plotly data: {json_path}")
    # fig.show()
# plot_weekly_total_revenue_line("weekly_total_revenue_line")

In [None]:
def plot_weekly_total_revenue_bar(output_file):
    # Calculate total revenue per week
    weekly_total = data.groupby('week')['amount'].sum().reset_index()
    weekly_total = weekly_total.sort_values('week')

    # Plot using Plotly for JSON output
    fig = go.Figure()

    fig.add_trace(go.Bar(
        x=weekly_total['week'],
        y=weekly_total['amount'],
        marker=dict(color='skyblue'),
        text=weekly_total['amount'].apply(lambda x: f"{int(x):,}"),
        textposition='auto',
        name="Total Earnings"
    ))

    # Update layout
    fig.update_layout(
        title="Weekly Total Earnings - All Vehicles",
        xaxis_title="Week",
        yaxis_title="Total Earnings (KSH)",
        xaxis_tickangle=-45,
        template='plotly_white'
    )

    # Save the Plotly JSON
    fig_json = fig.to_json()

    # Create output directories
    json_dir = f"../json/all/week"
    os.makedirs(json_dir, exist_ok=True)

    json_path = f"{json_dir}/{output_file}.json"
    with open(json_path, 'w') as json_file:
        json_file.write(fig_json)  # <-- fixed here

    print(f"Saved JSON Plotly data: {json_path}")

    # Show plot
    # fig.show()
# plot_weekly_total_revenue_bar("weekly_total_revenue_bar")


In [None]:
def plot_weekly_breakdown_by_vehicle(output_file):
    # Aggregate total earnings by week and vehicle
    weekly_by_vehicle = data.groupby(['week', 'vehicle_booked'])['amount'].sum().reset_index()

    # Pivot table for vehicle columns
    weekly_by_vehicle_pivot = weekly_by_vehicle.pivot(index='week', columns='vehicle_booked', values='amount')
    weekly_by_vehicle_pivot = weekly_by_vehicle_pivot.fillna(0).sort_index()

    # Create a Plotly figure
    fig = go.Figure()

    for vehicle in weekly_by_vehicle_pivot.columns:
        fig.add_trace(go.Bar(
            x=weekly_by_vehicle_pivot.index,
            y=weekly_by_vehicle_pivot[vehicle],
            name=vehicle,
            text=weekly_by_vehicle_pivot[vehicle].apply(lambda x: f"{int(x):,}"),
            textposition='auto'
        ))

    # Update layout for grouped bars
    fig.update_layout(
        title="Weekly Earnings Breakdown by Vehicle",
        xaxis_title="Week",
        yaxis_title="Total Earnings (KSH)",
        barmode='group',  # side-by-side bars (group / stack)
        template='plotly_white',
        xaxis_tickangle=-45,
        legend_title="Vehicles"
    )

    # Save the Plotly figure as JSON
    plot_json = fig.to_json()

    # Create output directory
    json_dir = f"../json/all/week"
    os.makedirs(json_dir, exist_ok=True)

    json_path = f"{json_dir}/{output_file}.json"
    with open(json_path, 'w') as json_file:
        json_file.write(plot_json)

    print(f"Saved JSON Plotly data: {json_path}")

    # fig.show()  # Optional: show in notebook or script
# plot_weekly_breakdown_by_vehicle("weekly_stacked_earnings")