In [10]:
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from pptx import Presentation
from pptx.util import Inches

# Step 1: User input for the folder containing Excel files
input_folder = input("Enter the path to the folder containing Excel files (default: './input'): ") or 'input'

# Step 2: User input for the charts output folder
charts_folder = input("Enter the path for saving charts (default: './charts'): ") or 'charts'

# Step 3: User input for the output PowerPoint file name
ppt_file = input("Enter the name of the PowerPoint file (default: 'financial_data.pptx'): ") or 'financial_data.pptx'

# Ensure the folders exist or create them
try:
    os.makedirs(charts_folder, exist_ok=True)
except OSError as e:
    print(f"Error creating directory {charts_folder}: {e}")
    exit(1)

# Create a new PowerPoint presentation
prs = Presentation()

# Iterate through all Excel files in the input folder
for excel_file in os.listdir(input_folder):
    if not excel_file.endswith('.xlsx'):
        continue

    print(f"Processing file: {excel_file}")

    # Step 4: Prompt user for the sheet name or use the first one by default
    sheet_name = input(f"Enter sheet name for {excel_file} (default: first sheet): ") or 0

    # Read the Excel file with error handling for different sheets
    file_path = os.path.join(input_folder, excel_file)
    try:
        df = pd.read_excel(file_path, sheet_name=sheet_name)
        df = df.dropna()  # Drop rows with missing data
    except Exception as e:
        print(f"Error reading {excel_file}: {e}")
        continue

    # Strip any extra spaces in column names
    df.columns = df.columns.str.strip()

    # Step 5: User input for the column name they want to analyze (e.g., 'Sales')
    print(f"Columns available in {excel_file}: {list(df.columns)}")
    group_column = input("Enter the column name to group by (e.g., 'Product', is numerical): ")
    sum_column = input(f"Enter the column name to sum (default: 'Sales' is numerical): ") or 'Sales'

    # Ensure the specified columns exist in the DataFrame
    if group_column not in df.columns:
        print(f"Error: Column '{group_column}' not found in {excel_file}. Skipping this file.")
        continue
    if sum_column not in df.columns:
        print(f"Error: Column '{sum_column}' not found in {excel_file}. Skipping this file.")
        continue

    # Ensure only numeric columns are used in the sum operation
    numeric_columns = df.select_dtypes(include=['number'])
    grouped = df.groupby(group_column)[numeric_columns.columns].sum().reset_index()

    # Create a chart using the seaborn library
    if sum_column in grouped.columns and pd.api.types.is_numeric_dtype(grouped[sum_column]):
        sns.barplot(x=grouped[group_column], y=grouped[sum_column])
        plt.title(f"{excel_file} - {group_column} vs {sum_column}")
        plt.xlabel(group_column)
        plt.ylabel(sum_column)
        plt.tight_layout()

        # Save the chart to the charts folder
        chart_file = excel_file.replace('.xlsx', '.png')
        chart_path = os.path.join(charts_folder, chart_file)
        plt.savefig(chart_path)
        print(f"Chart saved at {chart_path}")
        plt.close()

        # Add a slide to the PowerPoint presentation and insert the chart and title
        print(f"Adding slide for {excel_file}")
        slide_layout = prs.slide_layouts[5]
        slide = prs.slides.add_slide(slide_layout)
        title = slide.shapes.title
        title.text = excel_file.replace('.xlsx', '')

        left = Inches(0.5)
        top = Inches(1)
        width = Inches(9)
        height = Inches(6)
        if os.path.exists(chart_path):
            print(f"Adding picture {chart_path} to slide")
            slide.shapes.add_picture(chart_path, left, top, width=width, height=height)
        else:
            print(f"Chart not found at {chart_path}")
    else:
        print(f"No valid numeric data found in the column '{sum_column}' for {excel_file}. Skipping this file.")

# Save the PowerPoint presentation in the same directory as the script
ppt_path = os.path.join(os.getcwd(), ppt_file)
prs.save(ppt_path)
print(f"PowerPoint presentation saved as {ppt_file}")


Processing file: Canada.xlsx
Columns available in Canada.xlsx: ['Segment', 'Country', 'Product', 'Discount Band', 'Units Sold', 'Manufacturing Price', 'Sale Price', 'Gross Sales', 'Discounts', 'Sales', 'COGS', 'Profit', 'Date', 'Month Number', 'Month Name', 'Year']
Chart saved at ./charts\Canada.png
Adding slide for Canada.xlsx
Adding picture ./charts\Canada.png to slide
Processing file: France.xlsx
Columns available in France.xlsx: ['Segment', 'Country', 'Product', 'Discount Band', 'Units Sold', 'Manufacturing Price', 'Sale Price', 'Gross Sales', 'Discounts', 'Sales', 'COGS', 'Profit', 'Date', 'Month Number', 'Month Name', 'Year']
Chart saved at ./charts\France.png
Adding slide for France.xlsx
Adding picture ./charts\France.png to slide
Processing file: Germany.xlsx
Columns available in Germany.xlsx: ['Segment', 'Country', 'Product', 'Discount Band', 'Units Sold', 'Manufacturing Price', 'Sale Price', 'Gross Sales', 'Discounts', 'Sales', 'COGS', 'Profit', 'Date', 'Month Number', 'Month

In [12]:
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from pptx import Presentation
from pptx.util import Inches

# Step 1: User input for the folder containing Excel files
input_folder = input("Enter the path to the folder containing Excel files (default: './input'): ") or 'input'

# Step 2: User input for the charts output folder
charts_folder = input("Enter the path for saving charts (default: './charts'): ") or 'charts'

# Step 3: User input for the output PowerPoint file name
ppt_file = input("Enter the name of the PowerPoint file (default: 'financial_data.pptx'): ") or 'financial_data.pptx'

# Ensure the folders exist or create them
try:
    os.makedirs(charts_folder, exist_ok=True)
except OSError as e:
    print(f"Error creating directory {charts_folder}: {e}")
    exit(1)

# Create a new PowerPoint presentation
prs = Presentation()

# Iterate through all Excel files in the input folder
for excel_file in os.listdir(input_folder):
    if not excel_file.endswith('.xlsx'):
        continue

    # Ask the user if they want to include this file
    include_file = input(f"Do you want to include {excel_file} in the presentation? (y/n): ").strip().lower()
    if include_file != 'y':
        print(f"Skipping file: {excel_file}")
        continue

    print(f"Processing file: {excel_file}")

    # Step 4: Use the first sheet by default
    sheet_name = 0

    # Read the Excel file with error handling for different sheets
    file_path = os.path.join(input_folder, excel_file)
    try:
        df = pd.read_excel(file_path, sheet_name=sheet_name)
        df = df.dropna()  # Drop rows with missing data
    except Exception as e:
        print(f"Error reading {excel_file}: {e}")
        continue

    # Strip any extra spaces in column names
    df.columns = df.columns.str.strip()

    # Step 5: User input for the column name they want to analyze (e.g., 'Sales')
    print(f"Columns available in {excel_file}: {list(df.columns)}")
    group_column = input("Enter the column name to group by (e.g., 'Product'): ")
    sum_column = input(f"Enter the column name to sum (default: 'Sales'): ") or 'Sales'

    # Ensure the specified columns exist in the DataFrame
    if group_column not in df.columns:
        print(f"Error: Column '{group_column}' not found in {excel_file}. Skipping this file.")
        continue
    if sum_column not in df.columns:
        print(f"Error: Column '{sum_column}' not found in {excel_file}. Skipping this file.")
        continue

    # Ensure only numeric columns are used in the sum operation
    numeric_columns = df.select_dtypes(include=['number'])
    grouped = df.groupby(group_column)[numeric_columns.columns].sum().reset_index()

    # Create a chart using the seaborn library
    if sum_column in grouped.columns and pd.api.types.is_numeric_dtype(grouped[sum_column]):
        sns.barplot(x=grouped[group_column], y=grouped[sum_column])
        plt.title(f"{excel_file} - {group_column} vs {sum_column}")
        plt.xlabel(group_column)
        plt.ylabel(sum_column)
        plt.tight_layout()

        # Save the chart to the charts folder
        chart_file = excel_file.replace('.xlsx', '.png')
        chart_path = os.path.join(charts_folder, chart_file)
        plt.savefig(chart_path)
        print(f"Chart saved at {chart_path}")
        plt.close()

        # Add a slide to the PowerPoint presentation and insert the chart and title
        print(f"Adding slide for {excel_file}")
        slide_layout = prs.slide_layouts[5]
        slide = prs.slides.add_slide(slide_layout)
        title = slide.shapes.title
        title.text = excel_file.replace('.xlsx', '')

        left = Inches(0.5)
        top = Inches(1)
        width = Inches(9)
        height = Inches(6)
        if os.path.exists(chart_path):
            print(f"Adding picture {chart_path} to slide")
            slide.shapes.add_picture(chart_path, left, top, width=width, height=height)
        else:
            print(f"Chart not found at {chart_path}")
    else:
        print(f"No valid numeric data found in the column '{sum_column}' for {excel_file}. Skipping this file.")

# Save the PowerPoint presentation in the same directory as the script
ppt_path = os.path.join(os.getcwd(), ppt_file)
prs.save(ppt_path)
print(f"PowerPoint presentation saved as {ppt_file}")


Processing file: Canada.xlsx
Columns available in Canada.xlsx: ['Segment', 'Country', 'Product', 'Discount Band', 'Units Sold', 'Manufacturing Price', 'Sale Price', 'Gross Sales', 'Discounts', 'Sales', 'COGS', 'Profit', 'Date', 'Month Number', 'Month Name', 'Year']
Chart saved at ./charts\Canada.png
Adding slide for Canada.xlsx
Adding picture ./charts\Canada.png to slide
Processing file: France.xlsx
Columns available in France.xlsx: ['Segment', 'Country', 'Product', 'Discount Band', 'Units Sold', 'Manufacturing Price', 'Sale Price', 'Gross Sales', 'Discounts', 'Sales', 'COGS', 'Profit', 'Date', 'Month Number', 'Month Name', 'Year']
Chart saved at ./charts\France.png
Adding slide for France.xlsx
Adding picture ./charts\France.png to slide
Processing file: Germany.xlsx
Columns available in Germany.xlsx: ['Segment', 'Country', 'Product', 'Discount Band', 'Units Sold', 'Manufacturing Price', 'Sale Price', 'Gross Sales', 'Discounts', 'Sales', 'COGS', 'Profit', 'Date', 'Month Number', 'Month