In [3]:
# packages and dependencies import
import os
import sys
import json
import pandas as pd # used for data manipulation
import numpy as np # used for mathematical operations

# Used for data transfer and visualization 
import plotly.graph_objects as go
import plotly.io as pio
from plotly.utils import PlotlyJSONEncoder 

In [4]:
# Data Import
data = pd.read_csv("/home/tjselevani/Desktop/Apps/vscode/python/python analysis/data/last-3-months-transactions.csv")

# Convert 'created_at' column to datetime if not already
data['created_at'] = pd.to_datetime(data['created_at'])

# Extract the year from 'created_at'
data.loc[:, 'year'] = data['created_at'].dt.year

In [5]:
data = data[data['transaction_type'] == 'CREDIT'].copy()

In [6]:
# Define unique colors for each vehicle, plus "Unknown" and "Failed Transactions"
vehicle_colors = {
    'SM191': 'blue',
    'SM192': 'green',
    'SM944': 'yellow',
    'SM055': 'purple',
    'SM024': 'orange',
    'Unknown': 'gray',
    'Failed': 'red'
}

# Map colors to vehicle_booked, filling NaN values with a default color (e.g., gray)
data.loc[:,'color'] = data['vehicle_booked'].map(vehicle_colors).fillna('gray')

# Define vehicles list (unique vehicles from the data)
vehicles = data['vehicle_booked'].dropna().unique()

In [7]:
# Aggregate data by month for each vehicle
yearly_by_vehicle = data.groupby(['year', 'vehicle_booked'])['amount'].sum().reset_index()

# Pivot the data to have vehicles as columns
yearly_pivot = yearly_by_vehicle.pivot(index='year', columns='vehicle_booked', values='amount').reset_index()

# Sort by month (assuming format is YYYY-MM)
yearly_pivot = yearly_pivot.sort_values('year')

In [8]:
# Yearly totals (extract year from month)
# data['year'] = pd.to_datetime(data['created_at']).dt.year
yearly_total = data.groupby('year')['amount'].sum().reset_index()

In [None]:
# Plot yearly fares (line chart)
def plot_yearly_fares_line(output_file):
    # Aggregate total fare for each vehicle by year
    yearly_fares = data.groupby('year')['amount'].sum().reset_index()

    # Plotting
    fig = go.Figure()

    fig.add_trace(go.Scatter(
        x=yearly_fares['year'],
        y=yearly_fares['amount'],
        mode='lines+markers',
        line=dict(color='blue', width=2),
        marker=dict(symbol='circle', size=6),
        text=yearly_fares['amount'].apply(lambda x: f"{int(x):,}"),
        textposition='top center',
        name="Total Earnings"
    ))

    fig.update_layout(
        title="Yearly Fare Trends - All Vehicles (Line)",
        xaxis_title="Year",
        yaxis_title="Total Fare Amount (KSH)",
        xaxis_tickangle=-45,
        template='plotly_white'
    )

    # Save Plotly JSON
    plot_data_json = fig.to_json()

    # Create output directories
    json_dir = f"../json/all/year"
    os.makedirs(json_dir, exist_ok=True)

    json_path = f"{json_dir}/{output_file}.json"
    with open(json_path, 'w') as json_file:
        json_file.write(plot_data_json)

    print(f"Saved JSON Plotly data: {json_path}")
    # fig.show()
plot_yearly_fares_line("yearly_fares_line")

Saved JSON Plotly data: ../json/all/year/yearly_fares_line.json


In [None]:
# Plot yearly fares for a specific vehicle (line chart)
def plot_yearly_fares_line_x1(vehicle_id, output_file):
    # Filter data for the specific vehicle
    vehicle_data = data[data['vehicle_booked'] == vehicle_id]

    if vehicle_data.empty:
        print(f"No data found for vehicle: {vehicle_id}")
        return

    # Aggregate total fare for the vehicle by year
    yearly_fares = vehicle_data.groupby('year')['amount'].sum().reset_index()

    # Plotting
    fig = go.Figure()

    fig.add_trace(go.Scatter(
        x=yearly_fares['year'],
        y=yearly_fares['amount'],
        mode='lines+markers',
        line=dict(width=2),
        marker=dict(symbol='circle', size=6),
        text=yearly_fares['amount'].apply(lambda x: f"{int(x):,}"),
        textposition='top center',
        name=vehicle_id
    ))

    fig.update_layout(
        title=f"Yearly Fare Trends for Vehicle {vehicle_id} (Line)",
        xaxis_title="Year",
        yaxis_title="Total Fare Amount (KSH)",
        xaxis_tickangle=-45,
        template='plotly_white'
    )

    # Save Plotly JSON
    plot_data_json = fig.to_json()

    # Create output directories
    json_dir = f"../json/{vehicle_id}/eda/year"
    os.makedirs(json_dir, exist_ok=True)

    json_path = f"{json_dir}/{output_file}_{vehicle_id}.json"
    with open(json_path, 'w') as json_file:
        json_file.write(plot_data_json)

    print(f"Saved JSON Plotly data: {json_path}")
    fig.show()
plot_yearly_fares_line_x1("SM191", "yearly_fares_line")

In [None]:
# Plot yearly fares (bar chart)
def plot_yearly_fares_bar(output_file):
    # Aggregate total fare for each vehicle by year
    yearly_fares = data.groupby('year')['amount'].sum().reset_index()

    # Plotting
    fig = go.Figure()

    fig.add_trace(go.Bar(
        x=yearly_fares['year'],
        y=yearly_fares['amount'],
        marker=dict(color='skyblue'),
        text=yearly_fares['amount'].apply(lambda x: f"{int(x):,}"),
        textposition='auto',
        name="Total Earnings"
    ))

    fig.update_layout(
        title="Yearly Fare Breakdown - All Vehicles (Bar)",
        xaxis_title="Year",
        yaxis_title="Total Fare Amount (KSH)",
        xaxis_tickangle=-45,
        template='plotly_white'
    )

    # Save Plotly JSON
    plot_data_json = fig.to_json()

    # Create output directories
    json_dir = f"../json/all/year"
    os.makedirs(json_dir, exist_ok=True)

    json_path = f"{json_dir}/{output_file}.json"
    with open(json_path, 'w') as json_file:
        json_file.write(plot_data_json)

    print(f"Saved JSON Plotly data: {json_path}")
    fig.show()

In [None]:
# Plot yearly fares for a specific vehicle (bar chart)
def plot_yearly_fares_bar_x1(vehicle_id, output_file):
    # Filter data for the specific vehicle
    vehicle_data = data[data['vehicle_booked'] == vehicle_id]

    if vehicle_data.empty:
        print(f"No data found for vehicle: {vehicle_id}")
        return

    # Aggregate total fare for the vehicle by year
    yearly_fares = vehicle_data.groupby('year')['amount'].sum().reset_index()

    # Plotting
    fig = go.Figure()

    fig.add_trace(go.Bar(
        x=yearly_fares['year'],
        y=yearly_fares['amount'],
        name=vehicle_id,
        text=yearly_fares['amount'].apply(lambda x: f"{int(x):,}"),
        textposition='auto',
        marker_color='indianred'
    ))

    fig.update_layout(
        title=f"Yearly Fare Totals for Vehicle {vehicle_id} (Bar)",
        xaxis_title="Year",
        yaxis_title="Total Fare Amount (KSH)",
        xaxis_tickangle=-45,
        template='plotly_white'
    )

    # Save Plotly JSON
    plot_data_json = fig.to_json()

    # Create output directories
    json_dir = f"../json/{vehicle_id}/eda/year"
    os.makedirs(json_dir, exist_ok=True)

    json_path = f"{json_dir}/{output_file}.json"
    with open(json_path, 'w') as json_file:
        json_file.write(plot_data_json)

    print(f"Saved JSON Plotly data: {json_path}")
    fig.show()

In [None]:
# Plot yearly total revenue (bar chart)
def plot_yearly_total_revenue_bar(output_file):
    # Calculate total revenue per year
    yearly_total = data.groupby('year')['amount'].sum().reset_index()

    # Plot using Plotly for JSON output
    fig = go.Figure()

    fig.add_trace(go.Bar(
        x=yearly_total['year'],
        y=yearly_total['amount'],
        marker=dict(color='skyblue'),
        text=yearly_total['amount'].apply(lambda x: f"{int(x):,}"),
        textposition='auto',
        name="Total Earnings"
    ))

    # Update layout
    fig.update_layout(
        title="Yearly Total Earnings - All Vehicles",
        xaxis_title="Year",
        yaxis_title="Total Earnings (KSH)",
        xaxis_tickangle=-45,
        template='plotly_white'
    )

    # Save the Plotly JSON
    fig_json = fig.to_json()

    # Create output directories
    json_dir = f"../json/all/year"
    os.makedirs(json_dir, exist_ok=True)

    json_path = f"{json_dir}/{output_file}.json"
    with open(json_path, 'w') as json_file:
        json_file.write(fig_json)

    print(f"Saved JSON Plotly data: {json_path}")
    fig.show()

In [None]:
# Plot yearly total revenue (line chart)
def plot_yearly_total_revenue_line(output_file):
    # Calculate total revenue per year
    yearly_total = data.groupby('year')['amount'].sum().reset_index()

    # Plot using Plotly for JSON output
    fig = go.Figure()

    fig.add_trace(go.Scatter(
        x=yearly_total['year'],
        y=yearly_total['amount'],
        mode='lines+markers',
        line=dict(color='green', width=2),
        marker=dict(symbol='circle', size=6),
        text=yearly_total['amount'].apply(lambda x: f"{int(x):,}"),
        textposition='top center',
        name="Total Earnings"
    ))

    # Update layout
    fig.update_layout(
        title="Yearly Total Earnings - All Vehicles (Line)",
        xaxis_title="Year",
        yaxis_title="Total Earnings (KSH)",
        xaxis_tickangle=-45,
        template='plotly_white'
    )

    # Save the Plotly JSON
    fig_json = fig.to_json()

    # Create output directories
    json_dir = f"../json/all/year"
    os.makedirs(json_dir, exist_ok=True)

    json_path = f"{json_dir}/{output_file}.json"
    with open(json_path, 'w') as json_file:
        json_file.write(fig_json)

    print(f"Saved JSON Plotly data: {json_path}")
    fig.show()

In [None]:
# Plot yearly breakdown by vehicle (bar chart)
def plot_yearly_breakdown_by_vehicle(output_file):
    # Aggregate total earnings by year and vehicle
    yearly_by_vehicle = data.groupby(['year', 'vehicle_booked'])['amount'].sum().reset_index()

    # Pivot table for vehicle columns
    yearly_by_vehicle_pivot = yearly_by_vehicle.pivot(index='year', columns='vehicle_booked', values='amount')
    yearly_by_vehicle_pivot = yearly_by_vehicle_pivot.fillna(0).sort_index()

    # Create a Plotly figure
    fig = go.Figure()

    for vehicle in yearly_by_vehicle_pivot.columns:
        fig.add_trace(go.Bar(
            x=yearly_by_vehicle_pivot.index,
            y=yearly_by_vehicle_pivot[vehicle],
            name=vehicle,
            text=yearly_by_vehicle_pivot[vehicle].apply(lambda x: f"{int(x):,}"),
            textposition='auto'
        ))

    # Update layout for grouped bars
    fig.update_layout(
        title="Yearly Earnings Breakdown by Vehicle",
        xaxis_title="Year",
        yaxis_title="Total Earnings (KSH)",
        barmode='group',  # side-by-side bars (group / stack)
        template='plotly_white',
        xaxis_tickangle=-45,
        legend_title="Vehicles"
    )

    # Save the Plotly figure as JSON
    plot_json = fig.to_json()

    # Create output directory
    json_dir = f"../json/all/year"
    os.makedirs(json_dir, exist_ok=True)

    json_path = f"{json_dir}/{output_file}.json"
    with open(json_path, 'w') as json_file:
        json_file.write(plot_json)

    print(f"Saved JSON Plotly data: {json_path}")
    fig.show()