In [2]:
# packages and dependencies import
import os
import sys
import json
import pandas as pd # used for data manipulation
import numpy as np # used for mathematical operations

# Used for Exploratory Data Analysis
import seaborn as sns 

# used for data visualization
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter

# Used for data transfer and visualization 
import plotly.graph_objects as go
import plotly.io as pio

In [3]:
# Data Import
data = pd.read_csv("/home/tjselevani/Desktop/Apps/vscode/python/python analysis/data/last-3-months-transactions.csv")

# Convert 'created_at' column to datetime if not already
data['created_at'] = pd.to_datetime(data['created_at'])

# Extract the month from 'created_at'
data.loc[:, 'month'] = data['created_at'].dt.to_period('M').astype(str)

In [4]:
def format_months_xticks(month_labels):
    """Returns tick positions and labels for months."""
    return range(len(month_labels)), month_labels


In [5]:
# data.head()

In [6]:
# data.tail()

In [7]:
# data.shape

In [8]:
# data.columns

In [9]:
# data.describe()

In [10]:
# data.isnull().sum()

In [11]:
#Data Visualization

In [12]:
# Filter data for the specific vehicle
def get_vehicle_data(vehicle_id):
    """Returns all, credit, and debit transaction data for a given vehicle."""
    vehicle_df = data[data['vehicle_booked'] == vehicle_id].copy()
    vehicle_dt = data[data['transaction_type'] == 'CREDIT'].copy()
    
    return {
        "all": vehicle_dt,
        "credit": vehicle_df[vehicle_df['transaction_type'] == 'CREDIT'].copy(),
        "debit": vehicle_df[vehicle_df['transaction_type'] == 'DEBIT'].copy(),
    }

# Define the vehicles
vehicles = ['SM191', 'SM192', 'SM055', 'SM024', 'SM944']

# Create a dictionary to store data for all vehicles
vehicle_data_dict = {v: get_vehicle_data(v) for v in vehicles}

# Select a specific vehicle dynamically
selected_vehicle = 'SM024'  # Change this to switch vehicles

vehicle_data = vehicle_data_dict[selected_vehicle]['credit']
vehicle_revenue = vehicle_data_dict[selected_vehicle]['debit']
data = vehicle_data_dict[selected_vehicle]['all']


In [13]:
# Define unique colors for each vehicle, plus "Unknown" and "Failed Transactions"
vehicle_colors = {
    'SM191': 'blue',
    'SM192': 'green',
    'SM944': 'yellow',
    'SM055': 'purple',
    'SM024': 'orange',
    'Unknown': 'gray',
    'Failed': 'red'
}

# Map colors to vehicle_booked, filling NaN values with a default color (e.g., gray)
data.loc[:,'color'] = data['vehicle_booked'].map(vehicle_colors).fillna('gray')

# Define vehicles list (unique vehicles from the data)
vehicles = data['vehicle_booked'].dropna().unique()

In [14]:
# Aggregate data by month for each vehicle
monthly_by_vehicle = data.groupby(['month', 'vehicle_booked'])['amount'].sum().reset_index()

# Pivot the data to have vehicles as columns
monthly_pivot = monthly_by_vehicle.pivot(index='month', columns='vehicle_booked', values='amount').reset_index()

# Sort by month (assuming format is YYYY-MM)
monthly_pivot = monthly_pivot.sort_values('month')

In [15]:
# VISUALIZATION 2: Bar chart for monthly earnings by vehicle
# Reshape data for grouped bar chart
monthly_melt = monthly_by_vehicle.copy()

plt.figure(figsize=(14, 7))
sns.barplot(x='month', y='amount', hue='vehicle_booked', data=monthly_melt, palette=vehicle_colors)

plt.title("Monthly Earnings by Vehicle", fontsize=16)
plt.xlabel("Month", fontsize=12)
plt.ylabel("Total Earnings (KSH)", fontsize=12)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.legend(title="Vehicles")
plt.xticks(rotation=45)
plt.tight_layout()

output_file = "monthly_earnings_trend_bar"

# Create directories for saving plots
plot_dir = f"../files/all/month"
os.makedirs(plot_dir, exist_ok=True)

# Save as PNG
plot_path = f"{plot_dir}/{output_file}.png"
plt.savefig(plot_path, dpi=300, bbox_inches='tight')
plt.close()

print(f"Saved Matplotlib plot for {output_file}: {plot_path}")

Saved Matplotlib plot for monthly_earnings_trend_bar: ../files/all/month/monthly_earnings_trend_bar.png


In [16]:
# VISUALIZATION 1: Line chart for monthly earnings by vehicle
plt.figure(figsize=(14, 7))
for vehicle in vehicles:
    if vehicle in monthly_pivot.columns:
        plt.plot(monthly_pivot['month'], monthly_pivot[vehicle], marker='o', linewidth=2, label=vehicle)

plt.title("Monthly Earnings by Vehicle", fontsize=16)
plt.xlabel("Month", fontsize=12)
plt.ylabel("Total Earnings (KSH)", fontsize=12)
plt.grid(True, linestyle='--', alpha=0.7)
plt.legend(title="Vehicles")
plt.xticks(rotation=45)
plt.tight_layout()

output_file = "monthly_earnings_trend_line"

# Create directories for saving plots
plot_dir = f"../files/all/month"
os.makedirs(plot_dir, exist_ok=True)

# Save as PNG
plot_path = f"{plot_dir}/{output_file}.png"
plt.savefig(plot_path, dpi=300, bbox_inches='tight')
plt.close()

print(f"Saved Matplotlib plot for {output_file}: {plot_path}")

Saved Matplotlib plot for monthly_earnings_trend_line: ../files/all/month/monthly_earnings_trend_line.png


In [17]:
# Monthly totals for all vehicles combined
monthly_total = data.groupby('month')['amount'].sum().reset_index()
monthly_total = monthly_total.sort_values('month')

In [18]:
# VISUALIZATION 4: Monthly trends for all vehicles
plt.figure(figsize=(12, 6))
ax = sns.barplot(x='month', y='amount', data=monthly_total, color='skyblue')

plt.title("Monthly Total Earnings - All Vehicles", fontsize=16)
plt.xlabel("Month", fontsize=12)
plt.ylabel("Total Earnings (KSH)", fontsize=12)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.xticks(rotation=45)

# Add value annotations
for i, row in monthly_total.iterrows():
    ax.text(i, row['amount'], f"{int(row['amount']):,}", ha='center', va='bottom', fontsize=9)

plt.tight_layout()

output_file = "monthly_total_earnings_bar"

# Create directories for saving plots
plot_dir = f"../files/all/month"
os.makedirs(plot_dir, exist_ok=True)

# Save as PNG
plot_path = f"{plot_dir}/{output_file}.png"
plt.savefig(plot_path, dpi=300, bbox_inches='tight')
plt.close()

print(f"Saved Matplotlib plot for {output_file}: {plot_path}")

Saved Matplotlib plot for monthly_total_earnings_bar: ../files/all/month/monthly_total_earnings_bar.png


In [19]:
# VISUALIZATION 4: Monthly trends for all vehicles
plt.figure(figsize=(12, 6))
ax = sns.lineplot(x='month', y='amount', data=monthly_total, markers='o', linewidth=2, color='skyblue')

plt.title("Monthly Total Earnings - All Vehicles", fontsize=16)
plt.xlabel("Month", fontsize=12)
plt.ylabel("Total Earnings (KSH)", fontsize=12)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.xticks(rotation=45)

# Add value annotations
for i, row in monthly_total.iterrows():
    ax.text(i, row['amount'], f"{int(row['amount']):,}", ha='center', va='bottom', fontsize=9)

plt.tight_layout()

output_file = "monthly_total_earnings_line"

# Create directories for saving plots
plot_dir = f"../files/all/month"
os.makedirs(plot_dir, exist_ok=True)

# Save as PNG
plot_path = f"{plot_dir}/{output_file}.png"
plt.savefig(plot_path, dpi=300, bbox_inches='tight')
plt.close()

print(f"Saved Matplotlib plot for {output_file}: {plot_path}")

Saved Matplotlib plot for monthly_total_earnings_line: ../files/all/month/monthly_total_earnings_line.png


In [20]:
# VISUALIZATION 6: Combined view showing monthly breakdown by vehicle and total
plt.figure(figsize=(16, 8))

# Create a stacked bar chart
monthly_by_vehicle_pivot = monthly_by_vehicle.pivot_table(index='month', columns='vehicle_booked', values='amount')
monthly_by_vehicle_pivot = monthly_by_vehicle_pivot.fillna(0)  # Replace NaN with 0
monthly_by_vehicle_pivot = monthly_by_vehicle_pivot.sort_index()  # Sort by month

monthly_by_vehicle_pivot.plot(kind='bar', stacked=True, figsize=(16, 8), colormap='viridis')

plt.title("Monthly Earnings Breakdown by Vehicle", fontsize=16)
plt.xlabel("Month", fontsize=12)
plt.ylabel("Total Earnings (KSH)", fontsize=12)
plt.grid(axis='y', linestyle='--', alpha=0.4)
plt.legend(title="Vehicles", bbox_to_anchor=(1.05, 1), loc='upper left')

# Add total value annotations
monthly_totals = monthly_by_vehicle_pivot.sum(axis=1)
for i, total in enumerate(monthly_totals):
    plt.text(i, total + (total*0.01), f"Total: {int(total):,}", ha='center', va='bottom', fontsize=9, fontweight='bold')

plt.tight_layout()

output_file = "monthly_stacked_earnings"

# Create directories for saving plots
plot_dir = f"../files/all/month"
os.makedirs(plot_dir, exist_ok=True)

# Save as PNG
plot_path = f"{plot_dir}/{output_file}.png"
plt.savefig(plot_path, dpi=300, bbox_inches='tight')
plt.close()

print(f"Saved Matplotlib plot for {output_file}: {plot_path}")

Saved Matplotlib plot for monthly_stacked_earnings: ../files/all/month/monthly_stacked_earnings.png


<Figure size 1600x800 with 0 Axes>