### Import all necessary packages

In [3]:
import pandas as pd

data = pd.read_csv(r"data.csv")

for col in data.columns:
    print(f"'{col}'")
print(data.shape)

print(data.shape)
data

'sale'
'produce'
'beverages'
'timestamp'
(19, 4)
(19, 4)


Unnamed: 0,sale,produce,beverages,timestamp
0,1,apple,apple juice,10/1/2024
1,2,,apple juice,10/2/2024
2,3,apple,,10/3/2024
3,4,,water,10/4/2024
4,5,,lemonade,10/5/2024
5,6,apple,banana juice,10/6/2024
6,7,apple,,10/7/2024
7,8,banana,,10/8/2024
8,9,banana,orange juice,10/9/2024
9,10,banana,,10/10/2024


In [4]:
# generate a generic statistic

appleBased = data[(data['produce'] == 'apple') | (data['beverages'] == 'apple juice')]

print(appleBased.shape)
appleBased

(7, 4)


Unnamed: 0,sale,produce,beverages,timestamp
0,1,apple,apple juice,10/1/2024
1,2,,apple juice,10/2/2024
2,3,apple,,10/3/2024
5,6,apple,banana juice,10/6/2024
6,7,apple,,10/7/2024
13,14,apple,,10/14/2024
14,15,apple,,10/15/2024


In [5]:
fruit = data[~data['produce'].isna()]
print(fruit.shape)

(14, 4)


#### & Read in and explore data

In [6]:
common = len(fruit) + len(appleBased)
print(f'{common / len(data):.2f}')
total = len(data)

1.11


In [None]:
commonSales = data[(data['produce'] == 'apple') | (data['beverages'] == 'apple juice') | (data['produce'] != 'NaN')]
percentage = f'{len(commonSales) / len(data):.2%}'
print(percentage)
commonSales.shape


100.00%


(19, 4)

In [9]:
trifecta = data[(data['produce'] == 'apple') | (data['beverages'] == 'apple juice') | (data['produce'] != 'NaN')]
print(f'{len(trifecta) / len(data):.2f}')
trifecta.shape

1.00


(19, 4)

In [10]:
common - len(trifecta)

2

In [None]:
import getpass  
from docx import Document
from docx.shared import Inches
from datetime import datetime
import subprocess 
import matplotlib.pyplot as plt
import numpy as np

# filter down to top values to avoid huge charts
def count_top_unique_values(df, column, top_n=10):
    """Count the top N unique values in a specified DataFrame column."""
    return df[column].value_counts().nlargest(top_n)

 # mpl function to make bar charts
def create_bar_chart(data_series):
    """Create a bar chart from the given Series of counts."""
    plt.figure(figsize=(16, 9))
    plt.bar(data_series.index, data_series.values, color='skyblue')
    plt.title('Top 5 Unique Values')
    plt.xlabel('Values')
    plt.ylabel('Counts')
    
    chart_path = 'bar_chart.png'
    plt.savefig(chart_path)
    plt.close()
    return chart_path

# gpt function to check out git username
def get_git_user():
    """Get the git user name from the system, if available."""
    try:
        git_user = subprocess.check_output(["git", "config", "user.name"]).strip().decode('utf-8')
        return git_user
    except Exception:
        return None

# Set function to generate report
def generate_report(df, column, title, author=None):
    source = "unknown"  

    # Use system login name first
    if author is None:
        author = getpass.getuser()
        source = "System" 

        # Check for Git user as fallback
        git_user = get_git_user()
        if git_user:
            author = git_user
            source = "Git"
        else: None 

    print(f'Author: {author} (Source determined: {source})')
    
    data_series = count_top_unique_values(df, column)

    # run bar chart function
    chart_path = create_bar_chart(data_series)
    
    # open new doc
    doc = Document()

    # adding Title
    doc.add_heading('Sales Review', 0)

    # Add Date
    doc.add_paragraph(f'Created On: {datetime.now().strftime("%Y-%m-%d")}')

    # Add Author
    doc.add_paragraph(f'Created By: {author}')

    doc.add_heading('High Level Summary', level=1)
    doc.add_paragraph(
        "Summary of what is being covered in this document."
    )

    ### insert chart ###
    doc.add_picture(chart_path, width=Inches(5.5))
    doc.add_paragraph("Figure 1: Metrics Overview")
    ###

    doc.add_heading('Expanded Overview', level=1)
    doc.add_paragraph('Exact counts of the following items:')

    # example of how to iterate and add bullets
    for value, count in data_series.items():
        doc.add_paragraph(f'{value}: {count}', style='List Bullet')

    # semi dynamic paragraph
    doc.add_heading('Analysis', level=1)

    doc.add_paragraph(
        f"Of our total {total} sales, {percentage} were either apples or apple juice.\n"
        f"1. apple based products: {len(appleBased)} .\n"
        f"2. common sales: {len(commonSales)} .\n"
        f"3. all fruit: {len(fruit)} .\n"
    )

    # boiler plate
    doc.add_heading('Recommendations', level=1)
    doc.add_paragraph(
        "Based on the analysis of the metrics, we recommend the following actions:\n"
        "1. Increase advertisement for non apple based products\n"
        "2. consider discounting non apple based products."
    )

    # boiler plate
    doc.add_heading('Conclusion', level=1)
    doc.add_paragraph(
        "This concludes the analysis of the key metrics for this period. "
        "Please reach out for further details or clarification."
    )

    # Save doc to generic drive - can easily organize better
    doc_name = f'C:\\{title}_{datetime.now().strftime("%Y%m%d")}.docx'
    doc.save(doc_name)

    return doc_name


# run report func
title = 'Sales Review'
report_file = generate_report(commonSales, 'Model', title)

print(f'Report saved as: {report_file}')


In [None]:
import os
import sys
# import win32api
import win32com.client
import time
import pathlib
import datetime
from datetime import date, datetime, timedelta
start = time.time()
print("sending...")

outlook = win32com.client.Dispatch('outlook.application')
mail = outlook.CreateItem(0)
#DiscountTripleCheck
# mail.To = 'Last, First'
mail.Subject = title
mail.HTMLBody = "<p>Good morning,<br></br><br></br>Attached is the sales review for 2023<br></br><br></br> Best,<br></br> First Last<br></br></p>"

mail.Attachments.Add(report_file)
# mail.Attachments.Add(doc_name)

# mail.BCC = 'Last, First'

mail.send
end = time.time()
# print time is in seconds to show the amount of time elapsed while the script ran
print((str(end - start)[0:5]) + " Seconds Elapsed")


sending...
0.042 Seconds Elapsed


### Combine it all together and maybe add a map