In [11]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
import numpy as np
import os

In [12]:
# Load data
def load_data(filepath):
    df = pd.read_csv('data source.csv')
    df['Date'] = pd.to_datetime(df['Date'])
    df['Total Sale'] = df['Units Sold'] * df['Unit Price']
    df.dropna(inplace=True)
    return df

In [13]:
# Sales by Region
def plot_sales_by_region(df):
    region_sales = df.groupby('Region')['Total Sale'].sum().sort_values(ascending=False)
    plt.figure(figsize=(10,6))
    sns.barplot(x=region_sales.index, y=region_sales.values)
    plt.title("Sales by Region")
    plt.ylabel("Total Sales")
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.savefig("outputs/sales_by_region.png")
    plt.close()

In [14]:
# Sales by Product
def plot_sales_by_product(df):
    product_sales = df.groupby('Product')['Total Sale'].sum().sort_values(ascending=False).head(10)
    plt.figure(figsize=(10,6))
    sns.barplot(x=product_sales.index, y=product_sales.values)
    plt.title("Top 10 Product Sales")
    plt.ylabel("Total Sales")
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.savefig("outputs/sales_by_product.png")
    plt.close()


In [15]:
# Monthly Sales Trend
def plot_monthly_sales(df):
    monthly_sales = df.resample('M', on='Date').sum()['Total Sale']
    plt.figure(figsize=(12,6))
    monthly_sales.plot(marker='o')
    plt.title("Monthly Sales Trend")
    plt.xlabel("Date")
    plt.ylabel("Sales")
    plt.grid(True)
    plt.tight_layout()
    plt.savefig("outputs/monthly_sales_trend.png")
    plt.close()
    return monthly_sales

In [16]:
# Forecast Next 6 Months
def forecast_sales(monthly_sales):
    monthly_sales = monthly_sales.reset_index()
    monthly_sales['Month_Num'] = np.arange(len(monthly_sales))
    X = monthly_sales[['Month_Num']]
    y = monthly_sales['Total Sale']
    model = LinearRegression()
    model.fit(X, y)
    future_months = np.array([[i] for i in range(len(monthly_sales), len(monthly_sales)+6)])
    predictions = model.predict(future_months)
    for i, sale in enumerate(predictions, 1):
        print(f"Forecast Month {i}: ${sale:,.2f}")


In [17]:
if __name__ == "__main__":
    os.makedirs("outputs", exist_ok=True)
    file_path = "data/sales_data.xlsx"
    df = load_data(file_path)
    print(f"Total Sales: ${df['Total Sale'].sum():,.2f}")
    plot_sales_by_region(df)
    plot_sales_by_product(df)
    monthly_sales = plot_monthly_sales(df)
    forecast_sales(monthly_sales)


Total Sales: $2,400.00


  monthly_sales = df.resample('M', on='Date').sum()['Total Sale']
  ax.set_xlim(left, right)


Forecast Month 1: $2,400.00
Forecast Month 2: $2,400.00
Forecast Month 3: $2,400.00
Forecast Month 4: $2,400.00
Forecast Month 5: $2,400.00
Forecast Month 6: $2,400.00


