In [None]:
# packages and dependencies import
import os
import sys
import json
import pandas as pd # used for data manipulation
import numpy as np # used for mathematical operations

# Used for Exploratory Data Analysis
import seaborn as sns 

# used for data visualization
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter

# Used for data transfer and visualization 
import plotly.graph_objects as go
import plotly.io as pio
from plotly.utils import PlotlyJSONEncoder 

In [None]:
# Data Import
data = pd.read_csv("/home/tjselevani/Desktop/Apps/vscode/python/python analysis/data/last-3-months-transactions.csv")

# Convert 'created_at' column to datetime if not already
data['created_at'] = pd.to_datetime(data['created_at'])

# Extract the month from 'created_at'
data.loc[:, 'month'] = data['created_at'].dt.to_period('M').astype(str)

In [None]:
def format_months_xticks(month_labels):
    """Returns tick positions and labels for months."""
    return range(len(month_labels)), month_labels

In [None]:
data = data[data['transaction_type'] == 'CREDIT'].copy()

In [None]:
# Define unique colors for each vehicle, plus "Unknown" and "Failed Transactions"
vehicle_colors = {
    'SM191': 'blue',
    'SM192': 'green',
    'SM944': 'yellow',
    'SM055': 'purple',
    'SM024': 'orange',
    'Unknown': 'gray',
    'Failed': 'red'
}

# Map colors to vehicle_booked, filling NaN values with a default color (e.g., gray)
data.loc[:,'color'] = data['vehicle_booked'].map(vehicle_colors).fillna('gray')

# Define vehicles list (unique vehicles from the data)
vehicles = data['vehicle_booked'].dropna().unique()

In [None]:
# Aggregate data by month for each vehicle
monthly_by_vehicle = data.groupby(['month', 'vehicle_booked'])['amount'].sum().reset_index()

# Pivot the data to have vehicles as columns
monthly_pivot = monthly_by_vehicle.pivot(index='month', columns='vehicle_booked', values='amount').reset_index()

# Sort by month (assuming format is YYYY-MM)
monthly_pivot = monthly_pivot.sort_values('month')

In [None]:
# Month-by-Month Plotting Function
def plot_monthly_fares_line(output_file):
    """
    Generate and save a month-by-month fare trend line plot for each vehicle.
    """
    # Aggregate total fare by month for each vehicle
    pivot_data = monthly_pivot.fillna(0)
    
    plot_data = go.Figure()

    for vehicle in vehicles:
        if vehicle in pivot_data.columns:
            plot_data.add_trace(go.Scatter(
                x=pivot_data['month'],
                y=pivot_data[vehicle],
                mode='lines+markers',
                name=vehicle,
                line=dict(width=2),
                marker=dict(symbol='circle', size=6),
                text=pivot_data[vehicle].apply(lambda x: f"{int(x):,}"),
                textposition='top center'
            ))

    plot_data.update_layout(
        title="Monthly Fare Trends by Vehicle",
        xaxis_title="Month",
        yaxis_title="Total Fare Amount (KSH)",
        xaxis_tickangle=-45,
        template='plotly_white'
    )

    # Save Plotly JSON
    plot_data_json = plot_data.to_json()

    # Create output directories
    json_dir = f"../json/all/month"
    os.makedirs(json_dir, exist_ok=True)

    json_path = f"{json_dir}/{output_file}.json"
    with open(json_path, 'w') as json_file:
        json_file.write(plot_data_json)

    print(f"Saved JSON Plotly data: {json_path}")

    plot_data.show()

# Call the function to generate the plot
plot_monthly_fares_line("monthly_fares_line")

In [None]:
def plot_monthly_fares_line_x1(vehicle_id, output_file):
    # Filter data for the specific vehicle
    vehicle_data = data[data['vehicle_booked'] == vehicle_id]

    if vehicle_data.empty:
        print(f"No data found for vehicle: {vehicle_id}")
        return

    # Aggregate total fare for the vehicle by month
    monthly_fares = vehicle_data.groupby('month')['amount'].sum().reset_index()

    # Ensure all months are present (based on the unique months in the dataset)
    all_months = sorted(data['month'].unique())
    monthly_fares = monthly_fares.set_index('month').reindex(all_months, fill_value=0).reset_index()

    # Plotting
    plot_data = go.Figure()

    plot_data.add_trace(go.Scatter(
        x=monthly_fares['month'],
        y=monthly_fares['amount'],
        mode='lines+markers',
        name=vehicle_id,
        line=dict(width=2),
        marker=dict(symbol='circle', size=6),
        text=monthly_fares['amount'].apply(lambda x: f"{int(x):,}"),
        textposition='top center'
    ))

    plot_data.update_layout(
        title=f"Monthly Fare Trends for Vehicle {vehicle_id}",
        xaxis_title="Month",
        yaxis_title="Total Fare Amount (KSH)",
        xaxis_tickangle=-45,
        template='plotly_white'
    )

    # Save Plotly JSON
    plot_data_json = plot_data.to_json()

    plot_data.show()

    # Create output directories
    json_dir = f"../json/{vehicle_id}/eda/month"
    os.makedirs(json_dir, exist_ok=True)

    json_path = f"{json_dir}/{output_file}.json"
    with open(json_path, 'w') as json_file:
        json_file.write(plot_data_json)

    print(f"Saved JSON Plotly data: {json_path}")

# Example usage:
plot_monthly_fares_line_x1("SM191", "monthly_fares_line")

In [None]:
def plot_monthly_fares_bar(output_file):
    # Aggregate total fare for each vehicle by month
    monthly_vehicle_fares = data.groupby(['month', 'vehicle_booked'])['amount'].sum().reset_index()

    # Create a pivot table for better visualization
    pivot_data = monthly_vehicle_fares.pivot(index='month', columns='vehicle_booked', values='amount').fillna(0)

    # Ensure all vehicles are present
    for vehicle in vehicles:
        if vehicle not in pivot_data.columns:
            pivot_data[vehicle] = 0

    # Reindex with all months to ensure chronological order and fill missing months
    all_months = sorted(data['month'].unique())
    pivot_data = pivot_data.reindex(all_months).fillna(0)

    # Initialize Plotly figure
    fig = go.Figure()

    # Add bar trace per vehicle
    for vehicle in vehicles:
        if vehicle in pivot_data.columns:
            fig.add_trace(go.Bar(
                x=all_months,
                y=pivot_data[vehicle],
                name=vehicle,
                text=pivot_data[vehicle].apply(lambda x: f"{int(x):,}"),
                textposition='auto'
            ))

    # Update layout
    fig.update_layout(
        title="Monthly Fare Breakdown by Vehicle (Bar Chart)",
        xaxis_title="Month",
        yaxis_title="Total Fare Amount (KSH)",
        barmode='group',  # grouped bars (side by side)
        xaxis_tickangle=-45,
        template='plotly_white',
        legend_title="Vehicle"
    )

    # Save as JSON
    plot_data_json = fig.to_json()

    # Create output directory
    json_dir = f"../json/all/month"
    os.makedirs(json_dir, exist_ok=True)

    json_path = f"{json_dir}/{output_file}.json"
    with open(json_path, 'w') as json_file:
        json_file.write(plot_data_json)

    print(f"Saved JSON Plotly data: {json_path}")

    fig.show()

# Example usage
plot_monthly_fares_bar("monthly_fares_bar")

In [None]:
def plot_monthly_fares_bar_x1(vehicle_id, output_file):
    # Filter data for the specific vehicle
    vehicle_data = data[data['vehicle_booked'] == vehicle_id]

    if vehicle_data.empty:
        print(f"No data found for vehicle: {vehicle_id}")
        return

    # Aggregate total fare for the vehicle by month
    monthly_fares = vehicle_data.groupby('month')['amount'].sum().reset_index()

    # Ensure all months are present
    all_months = sorted(data['month'].unique())
    monthly_fares = monthly_fares.set_index('month').reindex(all_months, fill_value=0).reset_index()

    # Plotting
    fig = go.Figure()

    fig.add_trace(go.Bar(
        x=monthly_fares['month'],
        y=monthly_fares['amount'],
        name=vehicle_id,
        text=monthly_fares['amount'].apply(lambda x: f"{int(x):,}"),
        textposition='auto',
        marker_color='indianred'
    ))

    fig.update_layout(
        title=f"Monthly Fare Totals for Vehicle {vehicle_id} (Bar Chart)",
        xaxis_title="Month",
        yaxis_title="Total Fare Amount (KSH)",
        xaxis_tickangle=-45,
        template='plotly_white',
        legend_title="Vehicle"
    )

    # Save Plotly JSON
    plot_data_json = fig.to_json()

    # Create output directories
    json_dir = f"../json/{vehicle_id}/eda/month"
    os.makedirs(json_dir, exist_ok=True)

    json_path = f"{json_dir}/{output_file}.json"
    with open(json_path, 'w') as json_file:
        json_file.write(plot_data_json)

    print(f"Saved JSON Plotly data: {json_path}")

    fig.show()

# Example usage
plot_monthly_fares_bar_x1("SM055", "monthly_fares_bar")

In [None]:
def plot_monthly_total_revenue_line(output_file):
    # Calculate total revenue per month
    monthly_total = data.groupby('month')['amount'].sum().reset_index()
    monthly_total = monthly_total.sort_values('month')

    # Plot using Plotly
    fig = go.Figure()

    fig.add_trace(go.Scatter(
        x=monthly_total['month'],
        y=monthly_total['amount'],
        mode='lines+markers',
        line=dict(color='green', width=2),
        marker=dict(symbol='circle', size=6),
        text=monthly_total['amount'].apply(lambda x: f"{int(x):,}"),
        textposition='top center',
        name="Total Earnings"
    ))

    fig.update_layout(
        title="Monthly Total Earnings - All Vehicles (Line)",
        xaxis_title="Month",
        yaxis_title="Total Earnings (KSH)",
        xaxis_tickangle=-45,
        template='plotly_white'
    )

    # Save the Plotly JSON
    fig_json = fig.to_json()

    fig.show()

    # Create output directories
    json_dir = f"../json/all/month"
    os.makedirs(json_dir, exist_ok=True)

    json_path = f"{json_dir}/{output_file}.json"
    with open(json_path, 'w') as json_file:
        json_file.write(fig_json)

    print(f"Saved JSON Plotly data: {json_path}")

# Example usage
plot_monthly_total_revenue_line("monthly_total_revenue_line")


In [None]:
def plot_monthly_total_revenue_bar(output_file):
    # Calculate total revenue per month
    monthly_total = data.groupby('month')['amount'].sum().reset_index()
    monthly_total = monthly_total.sort_values('month')

    # Plot using Plotly for JSON output
    fig = go.Figure()

    fig.add_trace(go.Bar(
        x=monthly_total['month'],
        y=monthly_total['amount'],
        marker=dict(color='skyblue'),
        text=monthly_total['amount'].apply(lambda x: f"{int(x):,}"),
        textposition='auto',
        name="Total Earnings"
    ))

    # Update layout
    fig.update_layout(
        title="Monthly Total Earnings - All Vehicles",
        xaxis_title="Month",
        yaxis_title="Total Earnings (KSH)",
        xaxis_tickangle=-45,
        template='plotly_white'
    )

    # Save the Plotly JSON
    fig_json = fig.to_json()

    # Create output directories
    json_dir = f"../json/all/month"
    os.makedirs(json_dir, exist_ok=True)

    json_path = f"{json_dir}/{output_file}.json"
    with open(json_path, 'w') as json_file:
        json_file.write(fig_json)

    print(f"Saved JSON Plotly data: {json_path}")

    # Show plot
    fig.show()

# Example usage
plot_monthly_total_revenue_bar("monthly_total_revenue_bar")

In [17]:
def plot_monthly_breakdown_by_vehicle(output_file):
    # Aggregate total earnings by month and vehicle
    monthly_by_vehicle = data.groupby(['month', 'vehicle_booked'])['amount'].sum().reset_index()

    # Pivot table for vehicle columns
    monthly_by_vehicle_pivot = monthly_by_vehicle.pivot(index='month', columns='vehicle_booked', values='amount')
    monthly_by_vehicle_pivot = monthly_by_vehicle_pivot.fillna(0).sort_index()

    # Create a Plotly figure
    fig = go.Figure()

    for vehicle in monthly_by_vehicle_pivot.columns:
        fig.add_trace(go.Bar(
            x=monthly_by_vehicle_pivot.index,
            y=monthly_by_vehicle_pivot[vehicle],
            name=vehicle,
            text=monthly_by_vehicle_pivot[vehicle].apply(lambda x: f"{int(x):,}"),
            textposition='auto'
        ))

    # Update layout for grouped bars
    fig.update_layout(
        title="Monthly Earnings Breakdown by Vehicle",
        xaxis_title="Month",
        yaxis_title="Total Earnings (KSH)",
        barmode='stack',  # stacked bars
        template='plotly_white',
        xaxis_tickangle=-45,
        legend_title="Vehicles"
    )

    # Save the Plotly figure as JSON
    plot_json = fig.to_json()

    # Create output directory
    json_dir = f"../json/all/month"
    os.makedirs(json_dir, exist_ok=True)

    json_path = f"{json_dir}/{output_file}.json"
    with open(json_path, 'w') as json_file:
        json_file.write(plot_json)

    print(f"Saved JSON Plotly data: {json_path}")

    fig.show()  # Optional: show in notebook or script

# Example usage
plot_monthly_breakdown_by_vehicle("monthly_stacked_earnings")


Saved JSON Plotly data: ../json/all/month/monthly_stacked_earnings.json
