In [None]:
# packages and dependencies import
import os
import sys
import json
import pandas as pd # used for data manipulation
import numpy as np # used for mathematical operations

# Used for data transfer and visualization 
import plotly.graph_objects as go
import plotly.io as pio
from plotly.utils import PlotlyJSONEncoder 

In [None]:
# Data Import
data = pd.read_csv("/home/tjselevani/Desktop/Apps/vscode/python/python analysis/data/last-3-months-transactions.csv")

# Convert 'created_at' column to datetime if not already
data['created_at'] = pd.to_datetime(data['created_at'])

# Extract the year from 'created_at'
data.loc[:, 'year'] = data['created_at'].dt.year

In [None]:
data = data[data['transaction_type'] == 'CREDIT'].copy()

In [None]:
# Define unique colors for each vehicle, plus "Unknown" and "Failed Transactions"
vehicle_colors = {
    'SM191': 'blue',
    'SM192': 'green',
    'SM944': 'yellow',
    'SM055': 'purple',
    'SM024': 'orange',
    'Unknown': 'gray',
    'Failed': 'red'
}

# Map colors to vehicle_booked, filling NaN values with a default color (e.g., gray)
data.loc[:,'color'] = data['vehicle_booked'].map(vehicle_colors).fillna('gray')

# Define vehicles list (unique vehicles from the data)
vehicles = data['vehicle_booked'].dropna().unique()

In [None]:
# Aggregate data by month for each vehicle
yearly_by_vehicle = data.groupby(['year', 'vehicle_booked'])['amount'].sum().reset_index()

# Pivot the data to have vehicles as columns
yearly_pivot = yearly_by_vehicle.pivot(index='year', columns='vehicle_booked', values='amount').reset_index()

# Sort by month (assuming format is YYYY-MM)
yearly_pivot = yearly_pivot.sort_values('year')

In [None]:
# Yearly totals (extract year from month)
# data['year'] = pd.to_datetime(data['created_at']).dt.year
yearly_total = data.groupby('year')['amount'].sum().reset_index()

In [None]:
# Plot yearly fares for each individual vehicle (line chart)
def plot_yearly_fares_line(output_file):
    # Aggregate total fare for each vehicle by year
    yearly_fares = data.groupby(['year', 'vehicle_booked'])['amount'].sum().reset_index()

    # Plotting
    fig = go.Figure()

    # Iterate over each unique vehicle and plot a line for each
    for vehicle in yearly_fares['vehicle_booked'].unique():
        vehicle_data = yearly_fares[yearly_fares['vehicle_booked'] == vehicle]

        fig.add_trace(go.Scatter(
            x=vehicle_data['year'],
            y=vehicle_data['amount'],
            mode='lines+markers',
            line=dict(width=2),
            marker=dict(symbol='circle', size=6),
            text=vehicle_data['amount'].apply(lambda x: f"{int(x):,}"),
            textposition='top center',
            name=vehicle
        ))

    fig.update_layout(
        title="Yearly Fare Trends by Vehicle (Line)",
        xaxis_title="Year",
        yaxis_title="Total Fare Amount (KSH)",
        xaxis_tickangle=-45,
        template='plotly_white',
        legend_title="Vehicles"
    )

    # Save Plotly JSON
    plot_data_json = fig.to_json()

    # Create output directories
    json_dir = f"../json/all/year"
    os.makedirs(json_dir, exist_ok=True)

    json_path = f"{json_dir}/{output_file}.json"
    with open(json_path, 'w') as json_file:
        json_file.write(plot_data_json)

    print(f"Saved JSON Plotly data: {json_path}")
    # fig.show()

# Call the function to plot yearly fares line chart
# plot_yearly_fares_line("yearly_fares_line")


In [None]:
# Plot yearly fares for a single vehicle (line chart)
def plot_yearly_fares_line_x1(vehicle_id, output_file):
    # Aggregate total fare for the specific vehicle by year
    yearly_fares = data[data['vehicle_booked'] == vehicle_id].groupby(['year'])['amount'].sum().reset_index()

    # Plotting
    fig = go.Figure()

    # Plot the line for the specific vehicle
    fig.add_trace(go.Scatter(
        x=yearly_fares['year'],
        y=yearly_fares['amount'],
        mode='lines+markers',
        line=dict(width=2),
        marker=dict(symbol='circle', size=6),
        text=yearly_fares['amount'].apply(lambda x: f"{int(x):,}"),
        textposition='top center',
        name=vehicle_id
    ))

    fig.update_layout(
        title=f"Yearly Fare Trend for Vehicle {vehicle_id} (Line)",
        xaxis_title="Year",
        yaxis_title="Total Fare Amount (KSH)",
        xaxis_tickangle=-45,
        template='plotly_white',
        legend_title="Vehicle"
    )

    # Save Plotly JSON
    plot_data_json = fig.to_json()

    # Create output directories for the specific vehicle
    json_dir = f"../json/{vehicle_id}/eda/year"
    os.makedirs(json_dir, exist_ok=True)

    json_path = f"{json_dir}/{output_file}.json"
    with open(json_path, 'w') as json_file:
        json_file.write(plot_data_json)

    print(f"Saved JSON Plotly data: {json_path}")
    # fig.show()

# Call the function to plot yearly fares line chart for a specific vehicle
# plot_yearly_fares_line_x1("SM055", "yearly_fares_line")

In [None]:
# Plot yearly fares for each individual vehicle (bar chart)
def plot_yearly_fares_bar(output_file):
    # Aggregate total fare for each vehicle by year
    yearly_fares = data.groupby(['year', 'vehicle_booked'])['amount'].sum().reset_index()

    # Plotting
    fig = go.Figure()

    # Iterate over each unique vehicle and plot a bar for each
    for vehicle in yearly_fares['vehicle_booked'].unique():
        vehicle_data = yearly_fares[yearly_fares['vehicle_booked'] == vehicle]

        fig.add_trace(go.Bar(
            x=vehicle_data['year'],
            y=vehicle_data['amount'],
            name=vehicle,
            text=vehicle_data['amount'].apply(lambda x: f"{int(x):,}"),
            textposition='auto'
        ))

    fig.update_layout(
        title="Yearly Fare Trends by Vehicle (Bar)",
        xaxis_title="Year",
        yaxis_title="Total Fare Amount (KSH)",
        xaxis_tickangle=-45,
        barmode='group',  # Side-by-side bars for each vehicle
        template='plotly_white',
        legend_title="Vehicles"
    )

    # Save Plotly JSON
    plot_data_json = fig.to_json()

    # Create output directories
    json_dir = f"../json/all/year"
    os.makedirs(json_dir, exist_ok=True)

    json_path = f"{json_dir}/{output_file}.json"
    with open(json_path, 'w') as json_file:
        json_file.write(plot_data_json)

    print(f"Saved JSON Plotly data: {json_path}")
    # fig.show()

# Call the function to plot yearly fares bar chart
# plot_yearly_fares_bar("yearly_fares_bar")


In [None]:
# Plot yearly fares for a single vehicle (bar chart)
def plot_yearly_fares_bar_x1(vehicle_id, output_file):
    # Aggregate total fare for the specific vehicle by year
    yearly_fares = data[data['vehicle_booked'] == vehicle_id].groupby(['year'])['amount'].sum().reset_index()

    # Plotting
    fig = go.Figure()

    # Plot the bar for the specific vehicle
    fig.add_trace(go.Bar(
        x=yearly_fares['year'],
        y=yearly_fares['amount'],
        name=vehicle_id,
        text=yearly_fares['amount'].apply(lambda x: f"{int(x):,}"),
        textposition='auto'
    ))

    fig.update_layout(
        title=f"Yearly Fare Trend for Vehicle {vehicle_id} (Bar)",
        xaxis_title="Year",
        yaxis_title="Total Fare Amount (KSH)",
        xaxis_tickangle=-45,
        template='plotly_white',
        legend_title="Vehicle"
    )

    # Save Plotly JSON
    plot_data_json = fig.to_json()

    # Create output directories for the specific vehicle
    json_dir = f"../json/{vehicle_id}/eda/year"
    os.makedirs(json_dir, exist_ok=True)

    json_path = f"{json_dir}/{output_file}.json"
    with open(json_path, 'w') as json_file:
        json_file.write(plot_data_json)

    print(f"Saved JSON Plotly data: {json_path}")
    # fig.show()

# Call the function to plot yearly fares bar chart for a specific vehicle
# plot_yearly_fares_bar_x1("SM055", "yearly_fares_bar")

In [None]:
# Plot yearly total revenue (bar chart)
def plot_yearly_total_revenue_bar(output_file):
    # Calculate total revenue per year
    yearly_total = data.groupby('year')['amount'].sum().reset_index()

    # Plot using Plotly for JSON output
    fig = go.Figure()

    fig.add_trace(go.Bar(
        x=yearly_total['year'],
        y=yearly_total['amount'],
        marker=dict(color='skyblue'),
        text=yearly_total['amount'].apply(lambda x: f"{int(x):,}"),
        textposition='auto',
        name="Total Earnings"
    ))

    # Update layout
    fig.update_layout(
        title="Yearly Total Earnings - All Vehicles",
        xaxis_title="Year",
        yaxis_title="Total Earnings (KSH)",
        xaxis_tickangle=-45,
        template='plotly_white'
    )

    # Save the Plotly JSON
    fig_json = fig.to_json()

    # Create output directories
    json_dir = f"../json/all/year"
    os.makedirs(json_dir, exist_ok=True)

    json_path = f"{json_dir}/{output_file}.json"
    with open(json_path, 'w') as json_file:
        json_file.write(fig_json)

    print(f"Saved JSON Plotly data: {json_path}")
    # fig.show()
# plot_yearly_total_revenue_bar("yearly_total_revenue_bar")

In [None]:
# Plot yearly total revenue (line chart)
def plot_yearly_total_revenue_line(output_file):
    # Calculate total revenue per year
    yearly_total = data.groupby('year')['amount'].sum().reset_index()

    # Plot using Plotly for JSON output
    fig = go.Figure()

    fig.add_trace(go.Scatter(
        x=yearly_total['year'],
        y=yearly_total['amount'],
        mode='lines+markers',
        line=dict(color='green', width=2),
        marker=dict(symbol='circle', size=6),
        text=yearly_total['amount'].apply(lambda x: f"{int(x):,}"),
        textposition='top center',
        name="Total Earnings"
    ))

    # Update layout
    fig.update_layout(
        title="Yearly Total Earnings - All Vehicles (Line)",
        xaxis_title="Year",
        yaxis_title="Total Earnings (KSH)",
        xaxis_tickangle=-45,
        template='plotly_white'
    )

    # Save the Plotly JSON
    fig_json = fig.to_json()

    # Create output directories
    json_dir = f"../json/all/year"
    os.makedirs(json_dir, exist_ok=True)

    json_path = f"{json_dir}/{output_file}.json"
    with open(json_path, 'w') as json_file:
        json_file.write(fig_json)

    print(f"Saved JSON Plotly data: {json_path}")
    # fig.show()
# plot_yearly_total_revenue_line("yearly_total_revenue_line")

In [None]:
# Plot yearly breakdown by vehicle (bar chart)
def plot_yearly_breakdown_by_vehicle(output_file):
    # Aggregate total earnings by year and vehicle
    yearly_by_vehicle = data.groupby(['year', 'vehicle_booked'])['amount'].sum().reset_index()

    # Pivot table for vehicle columns
    yearly_by_vehicle_pivot = yearly_by_vehicle.pivot(index='year', columns='vehicle_booked', values='amount')
    yearly_by_vehicle_pivot = yearly_by_vehicle_pivot.fillna(0).sort_index()

    # Create a Plotly figure
    fig = go.Figure()

    for vehicle in yearly_by_vehicle_pivot.columns:
        fig.add_trace(go.Bar(
            x=yearly_by_vehicle_pivot.index,
            y=yearly_by_vehicle_pivot[vehicle],
            name=vehicle,
            text=yearly_by_vehicle_pivot[vehicle].apply(lambda x: f"{int(x):,}"),
            textposition='auto'
        ))

    # Update layout for grouped bars
    fig.update_layout(
        title="Yearly Earnings Breakdown by Vehicle",
        xaxis_title="Year",
        yaxis_title="Total Earnings (KSH)",
        barmode='stack',  # side-by-side bars (group / stack)
        template='plotly_white',
        xaxis_tickangle=-45,
        legend_title="Vehicles"
    )

    # Save the Plotly figure as JSON
    plot_json = fig.to_json()

    # Create output directory
    json_dir = f"../json/all/year"
    os.makedirs(json_dir, exist_ok=True)

    json_path = f"{json_dir}/{output_file}.json"
    with open(json_path, 'w') as json_file:
        json_file.write(plot_json)

    print(f"Saved JSON Plotly data: {json_path}")
    # fig.show()
# plot_yearly_breakdown_by_vehicle("yearly_breakdown")