In [1]:
!pip install pandas numpy matplotlib seaborn scikit-learn fpdf

# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import pickle
from fpdf import FPDF

# 1. Create Simulated Data
def create_data():
    np.random.seed(42)
    num_samples = 1000
    prices = np.random.uniform(5, 50, num_samples)
    promotions = np.random.choice([0, 1], num_samples)
    sales_volume = 2000 - 30 * prices + 500 * promotions + np.random.normal(0, 200, num_samples)
    data = pd.DataFrame({'Price': prices, 'Promotion': promotions, 'Sales_Volume': sales_volume})
    data.to_csv('sales_data.csv', index=False)  # Save the data to CSV
    return data

# 2. Exploratory Analysis
def exploratory_analysis(data):
    # Plot distribution of sales volume
    plt.figure(figsize=(10, 6))
    sns.histplot(data['Sales_Volume'], bins=30, color='blue', kde=True)
    plt.title('Distribution of Sales Volume')
    plt.xlabel('Sales Volume')
    plt.ylabel('Frequency')
    plt.savefig('sales_volume_distribution.png')  # Save the plot as PNG
    plt.close()

    # Scatter plot of sales volume vs price
    plt.figure(figsize=(10, 6))
    sns.scatterplot(x='Price', y='Sales_Volume', hue='Promotion', data=data)
    plt.title('Sales Volume vs. Price with Promotion Effect')
    plt.xlabel('Price')
    plt.ylabel('Sales Volume')
    plt.savefig('sales_vs_price.png')  # Save the plot as PNG
    plt.close()

# 3. Model Training
def model_training(data):
    # Define features and target variable
    X = data[['Price', 'Promotion']]
    y = data['Sales_Volume']

    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train linear regression model
    model = LinearRegression()
    model.fit(X_train, y_train)

    # Save trained model
    with open('sales_model.pkl', 'wb') as file:
        pickle.dump(model, file)

    # Make predictions and evaluate the model
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    # Plot actual vs predicted sales volume
    plt.figure(figsize=(10, 6))
    plt.scatter(y_test, y_pred, alpha=0.7)
    plt.title('Actual vs. Predicted Sales Volume')
    plt.xlabel('Actual Sales Volume')
    plt.ylabel('Predicted Sales Volume')
    plt.savefig('actual_vs_predicted.png')  # Save the plot as PNG
    plt.close()

    return mse, r2

# 4. Generate PDF Report
class PDFReport(FPDF):
    def header(self):
        self.set_font('Arial', 'B', 12)
        self.cell(0, 10, 'Price Optimization Analysis Report', 0, 1, 'C')

    def chapter_title(self, title):
        self.set_font('Arial', 'B', 12)
        self.cell(0, 10, title, 0, 1, 'L')
        self.ln(5)

    def chapter_body(self, body):
        self.set_font('Arial', '', 12)
        self.multi_cell(0, 10, body)
        self.ln(10)

def generate_pdf(mse, r2):
    pdf = PDFReport()
    pdf.add_page()

    # Introduction section
    pdf.chapter_title('Introduction')
    pdf.chapter_body("This report presents a price optimization analysis for maximizing sales volume.")

    # Methodology section
    pdf.chapter_title('Methodology')
    pdf.chapter_body("Data was generated using simulated sales data, including price, promotion, and sales volume.")

    # Analysis & Results section
    pdf.chapter_title('Analysis & Results')
    pdf.chapter_body(f"The model's Mean Squared Error (MSE) is {mse:.2f}, and the R-squared (R2) score is {r2:.2f}.")
    pdf.image('actual_vs_predicted.png', x=10, w=190)

    # Conclusion section
    pdf.chapter_title('Conclusion')
    pdf.chapter_body("The analysis shows a strong relationship between price, promotion, and sales volume.")

    # Save the report
    pdf.output('sales_report.pdf')

# 5. Create README.md
def create_readme():
    with open('README.md', 'w') as file:
        file.write("# Price Optimization Analysis\n")
        file.write("## Project Overview\n")
        file.write("This project aims to optimize product pricing to maximize sales volume.\n")
        file.write("## How to Run\n")
        file.write("1. Install the required libraries: pandas, numpy, matplotlib, seaborn, sklearn, fpdf.\n")
        file.write("2. Run the script: `python script.py`.\n")
        file.write("## Results\n")
        file.write("The model shows a significant relationship between price and sales volume.\n")

# Execute the project
if __name__ == "__main__":
    # 1. Create data
    data = create_data()

    # 2. Exploratory analysis
    exploratory_analysis(data)

    # 3. Model training
    mse, r2 = model_training(data)

    # 4. PDF Report
    generate_pdf(mse, r2)

    # 5. Create README.md
    create_readme()

    print("Project completed successfully!")


Collecting fpdf
  Downloading fpdf-1.7.2.tar.gz (39 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: fpdf
  Building wheel for fpdf (setup.py) ... [?25l[?25hdone
  Created wheel for fpdf: filename=fpdf-1.7.2-py2.py3-none-any.whl size=40704 sha256=da21ed2bf938d9517adcd74e3e02586ed61c0b884858bf1896e681ab09e28829
  Stored in directory: /root/.cache/pip/wheels/f9/95/ba/f418094659025eb9611f17cbcaf2334236bf39a0c3453ea455
Successfully built fpdf
Installing collected packages: fpdf
Successfully installed fpdf-1.7.2
Project completed successfully!


In [4]:
# Script para gerar um README.md detalhado

def create_readme():
    with open('README.md', 'w') as file:
        file.write("# Price Optimization Analysis\n")
        file.write("## Project Overview\n")
        file.write(
            "This project aims to optimize product pricing in a supermarket chain to maximize sales volume. "
            "The analysis uses simulated sales data to evaluate the relationship between pricing strategies, "
            "promotions, and sales volume.\n"
        )

        file.write("## Objectives\n")
        file.write(
            "- Understand the impact of price changes and promotions on sales volume.\n"
            "- Implement a linear regression model to predict sales volume based on pricing and promotions.\n"
            "- Generate actionable insights to improve pricing strategies and maximize revenue.\n"
        )

        file.write("## Methodology\n")
        file.write(
            "1. **Data Generation:** Simulated data representing prices, promotions, and sales volume is created.\n"
            "2. **Exploratory Data Analysis (EDA):** Analyze the distribution of sales volume, the effect of price changes, "
            "and the impact of promotions.\n"
            "3. **Modeling:** Train a linear regression model to predict sales volume based on pricing and promotion data.\n"
            "4. **Evaluation:** Evaluate model performance using Mean Squared Error (MSE) and R-squared (R2) metrics.\n"
            "5. **Reporting:** Generate a PDF report summarizing the results, including visualizations and recommendations.\n"
        )

        file.write("## How to Run the Project\n")
        file.write(
            "1. **Install the required libraries:**\n"
            "   Run the following command in the terminal to install all necessary libraries:\n"
            "   ```bash\n"
            "   pip install pandas numpy matplotlib seaborn scikit-learn fpdf\n"
            "   ```\n"
            "2. **Run the script:**\n"
            "   Execute the script using the following command:\n"
            "   ```bash\n"
            "   python script.py\n"
            "   ```\n"
            "3. **Check the outputs:**\n"
            "   - `sales_data.csv`: Contains the simulated sales data.\n"
            "   - PNG files: Various visualizations generated during the analysis.\n"
            "   - `sales_model.pkl`: The trained linear regression model.\n"
            "   - `sales_report.pdf`: A detailed PDF report of the analysis.\n"
        )

        file.write("## Results\n")
        file.write(
            "The analysis revealed a significant relationship between pricing, promotions, and sales volume. "
            "The trained linear regression model demonstrated effective prediction of sales volume with the following metrics:\n"
            "- **Mean Squared Error (MSE):** Indicates the average squared difference between predicted and actual values.\n"
            "- **R-squared (R2):** Represents the proportion of variance explained by the model.\n"
        )

        file.write("## Key Insights and Recommendations\n")
        file.write(
            "- **Price Elasticity:** The analysis shows that changes in price significantly affect sales volume, with promotions having a positive impact.\n"
            "- **Optimal Pricing Strategy:** Adjusting prices while leveraging promotions can enhance sales performance and maximize revenue.\n"
            "- **Further Analysis:** Consider expanding the model to include other variables, such as seasonal effects or product categories, to improve predictions.\n"
        )

        file.write("## Requirements\n")
        file.write(
            "This project requires the following software and libraries:\n"
            "- Python 3.x\n"
            "- pandas\n"
            "- numpy\n"
            "- matplotlib\n"
            "- seaborn\n"
            "- scikit-learn\n"
            "- fpdf\n"
        )

# Executar a função para criar o README.md
create_readme()
print("README.md has been successfully generated!")


README.md has been successfully generated!


In [5]:
from fpdf import FPDF
import os

class PDFReport(FPDF):
    def header(self):
        self.set_font('Arial', 'B', 16)
        self.cell(0, 10, 'Data Science Project Report', 0, 1, 'C')
        self.ln(10)

    def chapter_title(self, title):
        self.set_font('Arial', 'B', 14)
        self.cell(0, 10, title, 0, 1, 'L')
        self.ln(5)

    def chapter_body(self, body):
        self.set_font('Arial', '', 12)
        self.multi_cell(0, 10, body)
        self.ln(10)

    def add_image(self, image_path, x=10, y=None, w=180):
        if os.path.exists(image_path):
            self.image(image_path, x=x, y=y, w=w)
            self.ln(10)

def generate_pdf():
    pdf = PDFReport()

    # Add a page
    pdf.add_page()

    # 1. Introduction
    pdf.chapter_title('Introduction')
    intro_text = (
        "This project aims to optimize product pricing in a supermarket chain. The main goal is to maximize sales volume "
        "by analyzing the impact of pricing strategies and promotions. The analysis uses simulated sales data to evaluate the "
        "relationship between pricing, promotions, and sales volume.\n\n"
        "The importance of this analysis lies in helping businesses adjust pricing strategies to increase revenue and competitiveness."
    )
    pdf.chapter_body(intro_text)

    # 2. Methodology
    pdf.chapter_title('Methodology')
    methodology_text = (
        "The project follows a structured data science workflow, which includes the following steps:\n"
        "1. **Data Generation:** Simulated data representing prices, promotions, and sales volume.\n"
        "2. **Data Cleaning:** Handle missing values, outliers, and transform variables as needed.\n"
        "3. **Exploratory Data Analysis (EDA):** Analyze the distribution of sales volume and the impact of pricing strategies.\n"
        "4. **Modeling:** A linear regression model is trained to predict sales volume based on pricing and promotion data.\n"
        "5. **Evaluation:** The model's performance is evaluated using Mean Squared Error (MSE) and R-squared (R2) metrics.\n"
    )
    pdf.chapter_body(methodology_text)

    # 3. Analysis & Results
    pdf.chapter_title('Analysis & Results')
    analysis_text = (
        "The analysis reveals a strong relationship between pricing, promotions, and sales volume. The linear regression model shows "
        "that changes in pricing significantly affect sales, while promotions positively impact volume. The model achieved the following metrics:\n"
        "- **Mean Squared Error (MSE):** Measures the average squared difference between actual and predicted values.\n"
        "- **R-squared (R2):** Represents the proportion of variance explained by the model, indicating the model's effectiveness.\n"
        "Below are the visualizations generated during the analysis:\n"
    )
    pdf.chapter_body(analysis_text)

    # Add example images
    pdf.add_image('sales_volume_distribution.png')
    pdf.add_image('sales_vs_price.png')
    pdf.add_image('actual_vs_predicted.png')

    # 4. Conclusions & Recommendations
    pdf.chapter_title('Conclusions & Recommendations')
    conclusion_text = (
        "The analysis suggests that adjusting prices while leveraging promotions can enhance sales performance. "
        "Key recommendations include:\n"
        "- Implement dynamic pricing strategies based on consumer demand and competitor pricing.\n"
        "- Use promotions selectively to boost sales volume without significantly reducing profitability.\n"
        "- Explore further analysis by including seasonal effects and other product features.\n\n"
        "These insights provide a roadmap for the business to optimize pricing strategies and achieve better sales results."
    )
    pdf.chapter_body(conclusion_text)

    # Save the PDF
    pdf_file = 'detailed_project_report.pdf'
    pdf.output(pdf_file)
    print(f"{pdf_file} has been successfully generated!")

# Run the PDF generation function
generate_pdf()


detailed_project_report.pdf has been successfully generated!


In [3]:
from google.colab import files

# Lista de arquivos para download
files_to_download = [
    'sales_data.csv',
    'sales_volume_distribution.png',
    'sales_vs_price.png',
    'actual_vs_predicted.png',
    'sales_model.pkl',
    'detailed_project_report.pdf',
    'README.md'
]

# Baixar cada arquivo individualmente
for file in files_to_download:
    try:
        files.download(file)
    except FileNotFoundError:
        print(f"{file} not found.")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>