In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [26]:
def read_energy_data(path):
    try: 
        df = pd.read_csv(path, parse_dates = ['date'])
        df.dropna(subset = ['energy_usage'], inplace = True)
        df['energy_usage'] = pd.to_numeric(df['energy_usage'], errors  = 'coerce')
        df.dropna(subset = ['energy_usage'], inplace = True)
        return df
    except Exception as e:
        print(f'Error reading File: {e}')
        return pd.DataFrame()

def calculate_statistics(df):
    total = df['energy_usage'].sum()
    average = df['energy_usage'].mean()
    return total, average

def detect_outliers(df, threshold = 2):
    "based on z-score"
    mean = df['energy_usage'].mean()
    std = df['energy_usage'].std()
    df['z_score'] = (df['energy_usage'] - mean) / std
    df['is_outlier'] = df['z_score'].abs() > threshold
    return df

def summary_report(df, total, average, output_path = 'Summary_report.txt', format = 'txt'):
    outliers = df[df['is_outlier']]

    if format == 'txt':
        with open(output_path, 'w') as f:
            f.write(f'Total energy usage: {total:.2f}\n')
            f.write(f'Average daily usage: {average:.2f}\n')
            f.write(f'Outliers detected: {len(outliers)}\n\n')
            for _, row in outliers.iterrows():
                f.write(f'{row['date'].date()} - Usage: {row['energy_usage']:.2f} - Z-score: {row['z_score']:.2f}\n')
    elif format == 'csv':
        outliers.to_csv(output_path, index = False)

def energy_usage_plot(df, output_path = 'energy_usage_plot.png'):
    "Plotting energy usage and highlights outliers"
    plt.figure(figsize = (12,6))
    plt.plot(df['date'], df['energy_usage'], label = 'Energy Usage', marker = 'o', linestyle = '-')
    outliers = df[df['is_outlier']]
    plt.scatter(outliers['date'], outliers['energy_usage'], color = 'red', label = 'Outliers', zorder = 5)
    plt.xlabel('Date')
    plt.ylabel('Energy Usage')
    plt.title('Daily usage with outliers highlighted')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(output_path)
    plt.close()
    

In [27]:
def main():
    input_file = 'daily_energy_usage_sample_100_days.csv'
    report_file = 'summary_report.txt'
    plot_file = 'energy_usage_plot.png'

    df = read_energy_data(input_file)
    if df.empty:
        print("No valid data found")
        return

    total, average = calculate_statistics(df)
    df = detect_outliers(df)

    summary_report(df, total, average, output_path = report_file, format = 'txt')
    energy_usage_plot(df, output_path = plot_file)

    print(f'Report written to {report_file}')
    print(f'Plot saved as {plot_file}')

if __name__ == '__main__':
    main()

Report written to summary_report.txt
Plot saved as energy_usage_plot.png
