In [2]:
# packages and dependencies import
import os
import sys
import json
import pandas as pd
import seaborn as sns # Used for Exploratory Data Analysis
import numpy as np
import matplotlib
from pandas import DataFrame

# matplotlib.use('TkAgg')  # or 'QtAgg' if PyQt is installed {pip install PyQt6}
import matplotlib.pyplot as plt

In [3]:
# Data Import
data = pd.read_csv("/home/tjselevani/Desktop/Apps/vscode/python/python analysis/data/last-3-months-transactions.csv")

# Convert 'created_at' column to datetime if not already
data['created_at'] = pd.to_datetime(data['created_at'])

# Extract the year from 'created_at'
data.loc[:, 'year'] = data['created_at'].dt.year

In [4]:
def format_years_xticks(year_labels):
    """Returns tick positions and labels for years."""
    return range(len(year_labels)), year_labels

In [5]:
# data.head()

In [6]:
# data.tail()

In [7]:
# data.shape

In [8]:
# data.columns

In [9]:
# data.describe()

In [10]:
# data.isnull().sum()

In [11]:
#Data Visualization

In [12]:
# Filter data for the specific vehicle
def get_vehicle_data(vehicle_id):
    """Returns all, credit, and debit transaction data for a given vehicle."""
    vehicle_df = data[data['vehicle_booked'] == vehicle_id].copy()
    vehicle_dt = data[data['transaction_type'] == 'CREDIT'].copy()
    
    return {
        "all": vehicle_dt,
        "credit": vehicle_df[vehicle_df['transaction_type'] == 'CREDIT'].copy(),
        "debit": vehicle_df[vehicle_df['transaction_type'] == 'DEBIT'].copy(),
    }

# Define the vehicles
vehicles = ['SM191', 'SM192', 'SM055', 'SM024', 'SM944']

# Create a dictionary to store data for all vehicles
vehicle_data_dict = {v: get_vehicle_data(v) for v in vehicles}

# Select a specific vehicle dynamically
selected_vehicle = 'SM024'  # Change this to switch vehicles

vehicle_data = vehicle_data_dict[selected_vehicle]['credit']
vehicle_revenue = vehicle_data_dict[selected_vehicle]['debit']
data = vehicle_data_dict[selected_vehicle]['all']


In [13]:
# Define unique colors for each vehicle, plus "Unknown" and "Failed Transactions"
vehicle_colors = {
    'SM191': 'blue',
    'SM192': 'green',
    'SM944': 'yellow',
    'SM055': 'purple',
    'SM024': 'orange',
    'Unknown': 'gray',
    'Failed': 'red'
}

# Map colors to vehicle_booked, filling NaN values with a default color (e.g., gray)
data.loc[:,'color'] = data['vehicle_booked'].map(vehicle_colors).fillna('gray')

# Define vehicles list (unique vehicles from the data)
vehicles = data['vehicle_booked'].dropna().unique()

In [14]:
# Aggregate data by month for each vehicle
yearly_by_vehicle = data.groupby(['year', 'vehicle_booked'])['amount'].sum().reset_index()

# Pivot the data to have vehicles as columns
yearly_pivot = yearly_by_vehicle.pivot(index='year', columns='vehicle_booked', values='amount').reset_index()

# Sort by month (assuming format is YYYY-MM)
yearly_pivot = yearly_pivot.sort_values('year')

In [15]:
# Yearly totals (extract year from month)
# data['year'] = pd.to_datetime(data['created_at']).dt.year
yearly_total = data.groupby('year')['amount'].sum().reset_index()


In [16]:
# VISUALIZATION 5: Yearly comparison for all vehicles
plt.figure(figsize=(10, 6))
ax = sns.barplot(x='year', y='amount', data=yearly_total, palette='Blues_d')

plt.title("Yearly Total Earnings - All Vehicles", fontsize=16)
plt.xlabel("Year", fontsize=12)
plt.ylabel("Total Earnings (KSH)", fontsize=12)
plt.grid(axis='y', linestyle='--', alpha=0.7)

# Add value annotations
for i, row in yearly_total.iterrows():
    ax.text(i, row['amount'], f"{int(row['amount']):,}", ha='center', va='bottom', fontsize=10)

plt.tight_layout()

output_file = "yearly_stacked_earnings_bar"

# Create directories for saving plots
plot_dir = f"../files/all/year"
os.makedirs(plot_dir, exist_ok=True)

# Save as PNG
plot_path = f"{plot_dir}/{output_file}.png"
plt.savefig(plot_path, dpi=300, bbox_inches='tight')
plt.close()

print(f"Saved Matplotlib plot for {output_file}: {plot_path}")


Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  ax = sns.barplot(x='year', y='amount', data=yearly_total, palette='Blues_d')


Saved Matplotlib plot for yearly_stacked_earnings_bar: ../files/all/year/yearly_stacked_earnings_bar.png


In [17]:
# VISUALIZATION 6: Combined view showing yearly breakdown by vehicle and total
plt.figure(figsize=(16, 8))

# Create a stacked bar chart
yearly_by_vehicle_pivot = yearly_by_vehicle.pivot_table(index='year', columns='vehicle_booked', values='amount')
yearly_by_vehicle_pivot = yearly_by_vehicle_pivot.fillna(0)  # Replace NaN with 0
yearly_by_vehicle_pivot = yearly_by_vehicle_pivot.sort_index()  # Sort by month

yearly_by_vehicle_pivot.plot(kind='bar', stacked=True, figsize=(16, 8), colormap='viridis')

plt.title("Yearly Earnings Breakdown by Vehicle", fontsize=16)
plt.xlabel("Month", fontsize=12)
plt.ylabel("Total Earnings (KSH)", fontsize=12)
plt.grid(axis='y', linestyle='--', alpha=0.4)
plt.legend(title="Vehicles", bbox_to_anchor=(1.05, 1), loc='upper left')

# Add total value annotations
yearly_totals = yearly_by_vehicle_pivot.sum(axis=1)
for i, total in enumerate(yearly_totals):
    plt.text(i, total + (total*0.01), f"Total: {int(total):,}", ha='center', va='bottom', fontsize=9, fontweight='bold')

plt.tight_layout()

output_file = "yearly_stacked_earnings_stacked"

# Create directories for saving plots
plot_dir = f"../files/all/year"
os.makedirs(plot_dir, exist_ok=True)

# Save as PNG
plot_path = f"{plot_dir}/{output_file}.png"
plt.savefig(plot_path, dpi=300, bbox_inches='tight')
plt.close()

print(f"Saved Matplotlib plot for {output_file}: {plot_path}")

Saved Matplotlib plot for yearly_stacked_earnings_stacked: ../files/all/year/yearly_stacked_earnings_stacked.png


<Figure size 1600x800 with 0 Axes>