# Fuel Use Data Analysis
This notebook provides a detailed analysis of vehicle fuel usage, cost, and mileage data. It includes data loading, metric computation, visualization, and outlier detection.

In [None]:
# Import required libraries
import pandas as pd
import matplotlib.pyplot as plt
import os

## Load and Validate Data

In [None]:
def load_data(filepath):
    '''Load and validate the fuel use dataset.'''
    if not os.path.exists(filepath):
        raise FileNotFoundError(f'Data file not found: {filepath}')
    df = pd.read_csv(filepath, comment='#', names=['Mileage', 'Cost', 'Fuel_Litres', 'Date'])
    df = df.dropna(subset=['Mileage', 'Fuel_Litres', 'Cost'])
    df['Mileage'] = pd.to_numeric(df['Mileage'], errors='coerce')
    df['Cost'] = pd.to_numeric(df['Cost'], errors='coerce')
    df['Fuel_Litres'] = pd.to_numeric(df['Fuel_Litres'], errors='coerce')
    df = df.dropna(subset=['Mileage', 'Cost', 'Fuel_Litres'])
    df = df.reset_index(drop=True)
    return df

In [None]:
# Load the dataset
data_file = 'CE2NMP_ResitData_FuelUse.csv'
df = load_data(data_file)
print(f'Data loaded: {len(df)} records')

## Compute Metrics

In [None]:
def compute_metrics(df):
    '''Compute derived metrics: miles driven, fuel efficiency, cost per litre.'''
    df['Prev_Mileage'] = df['Mileage'].shift(1)
    df['Miles_Driven'] = df['Mileage'] - df['Prev_Mileage']
    df['Miles_per_Litre'] = df['Miles_Driven'] / df['Fuel_Litres']
    df['Cost_per_Litre'] = df['Cost'] / df['Fuel_Litres']
    return df

In [None]:
df = compute_metrics(df)

## Visualize Fuel Efficiency and Cost per Litre

In [None]:
def plot_metric(x, y, xlabel, ylabel, title, color='blue'):
    plt.figure(figsize=(10, 5))
    plt.plot(x, y, marker='o', color=color)
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.grid(True)
    plt.tight_layout()
    plt.show()

In [None]:
# Fuel Efficiency Plot
plot_metric(df['Mileage'], df['Miles_per_Litre'], 'Mileage', 'Miles per Litre', 'Fuel Efficiency (Miles per Litre)')

In [None]:
# Cost per Litre Plot
plot_metric(df['Mileage'], df['Cost_per_Litre'], 'Mileage', 'Cost per Litre', 'Cost per Litre Over Time', color='orange')

## Summary Statistics

In [None]:
summary = df[['Miles_per_Litre', 'Cost_per_Litre']].describe()
print(summary)

## Outlier Detection

In [None]:
def detect_outliers(df, column, lower_quantile=0.05, upper_quantile=0.95):
    lower = df[column].quantile(lower_quantile)
    upper = df[column].quantile(upper_quantile)
    outliers = df[(df[column] < lower) | (df[column] > upper)]
    return outliers

In [None]:
outliers = detect_outliers(df, 'Miles_per_Litre')
print(outliers[['Mileage', 'Cost', 'Fuel_Litres', 'Miles_per_Litre']])