<a href="https://colab.research.google.com/github/JunaidRaza78/RI-Software-Predictive-models/blob/main/Copy_of_Pricing_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Pricing Analysis** (Analyze the impact of pricing strategies on sales and profitability. Determine the optimal pricing strategy for different products and customer segments.)

In [None]:
import pandas as pd
import numpy as np

# Set a random seed for reproducibility
np.random.seed(42)

# Create a mock dataset for products
products_data = {
    'ProductID': np.arange(1, 101),
    'ProductName': [f'Product_{i}' for i in range(1, 101)]
}

products_df = pd.DataFrame(products_data)

# Create a mock dataset for customers
customers_data = {
    'CustomerID': np.arange(1, 501),
    'CustomerName': [f'Customer_{i}' for i in range(1, 501)],
    'Age': np.random.randint(18, 60, size=500),
    'Gender': np.random.choice(['Male', 'Female'], size=500),
    'CompanyType': np.random.choice(['Tech', 'Retail', 'Finance'], size=500),
    'PurchaseAmount': np.random.uniform(50, 500, size=500),
}

customers_df = pd.DataFrame(customers_data)


# Create a mock dataset for sales
sales_data = {
    'TransactionID': np.arange(1, 1001),
    'ProductID': np.random.choice(np.arange(1, 101), size=1000),
    'CustomerID': np.random.choice(np.arange(1, 501), size=1000),
    'Quantity': np.random.randint(1, 10, size=1000),
    'Revenue': np.random.uniform(10, 200, size=1000),
}

sales_df = pd.DataFrame(sales_data)

# Create a mock dataset for pricing strategies
pricing_data = {
    'ProductID': np.arange(1, 101),
    'BasePrice': np.random.uniform(50, 200, size=100),
    'DiscountRate': np.random.uniform(0.05, 0.3, size=100),
}

pricing_df = pd.DataFrame(pricing_data)

# Create a mock dataset for expenses
expenses_data = {
    'ProductID': np.arange(1, 101),
    'EmployeeExpenses': np.random.uniform(1, 5, size=100),
    'OfficeExpenses': np.random.uniform(5, 10, size=100),
    'MarketingExpenses': np.random.uniform(1, 3, size=100),
    'MaintenanceExpenses': np.random.uniform(2, 6, size=100),
}

expenses_df = pd.DataFrame(expenses_data)

# Merge datasets
merged_df = pd.merge(sales_df, products_df, on='ProductID')
merged_df = pd.merge(merged_df, customers_df, on='CustomerID')
merged_df = pd.merge(merged_df, pricing_df, on='ProductID')
merged_df = pd.merge(merged_df, expenses_df, on='ProductID')
#merged_df = pd.merge(merged_df, company_df, on='ProductID')  # Note: Changed to 'ProductID'

# Display the first few rows of the merged dataset
merged_df.head()



Unnamed: 0,TransactionID,ProductID,CustomerID,Quantity,Revenue,ProductName,CustomerName,Age,Gender,CompanyType,PurchaseAmount,BasePrice,DiscountRate,EmployeeExpenses,OfficeExpenses,MarketingExpenses,MaintenanceExpenses
0,1,77,486,8,29.18584,Product_77,Customer_486,38,Male,Finance,468.926913,90.374236,0.295685,3.539627,9.554131,2.194625,2.15969
1,101,77,150,7,176.073637,Product_77,Customer_150,45,Male,Tech,108.281715,90.374236,0.295685,3.539627,9.554131,2.194625,2.15969
2,183,77,270,4,178.122472,Product_77,Customer_270,35,Male,Tech,133.486482,90.374236,0.295685,3.539627,9.554131,2.194625,2.15969
3,250,77,62,2,75.35196,Product_77,Customer_62,51,Male,Retail,318.321445,90.374236,0.295685,3.539627,9.554131,2.194625,2.15969
4,517,77,437,5,119.78334,Product_77,Customer_437,48,Female,Finance,429.876702,90.374236,0.295685,3.539627,9.554131,2.194625,2.15969


In [None]:
import pandas as pd

def analyze_pricing_impact(data):
    """
    Analyze the impact of pricing strategies on sales.

    Parameters:
    - data: DataFrame containing sales, pricing, and product information.

    Returns:
    - DataFrame with the analysis results.
    """

    # Calculate the discounted price
    data['DiscountedPrice'] = data['BasePrice'] * (1 - data['DiscountRate'])

    # Calculate the total revenue
    data['TotalRevenue'] = data['Quantity'] * data['DiscountedPrice']

    # Group by ProductID and calculate total revenue for each product
    product_analysis = data.groupby('ProductName')['TotalRevenue'].sum().reset_index()

    return product_analysis

# Example usage:
pricing_impact_analysis = analyze_pricing_impact(merged_df)
pricing_impact_analysis.head()


Unnamed: 0,ProductName,TotalRevenue
0,Product_1,4556.263909
1,Product_10,3978.376375
2,Product_100,2848.00903
3,Product_11,1973.951939
4,Product_12,7443.956175


In [None]:
import pandas as pd

def analyze_profitability(data):
    """
    Analyze the impact of pricing strategies on profitability.

    Parameters:
    - data: DataFrame containing sales, pricing, product, and expense information.

    Returns:
    - DataFrame with the analysis results including profitability.
    """

    # Calculate the discounted price
    data['DiscountedPrice'] = data['BasePrice'] * (1 - data['DiscountRate'])

    # Calculate the total revenue
    data['TotalRevenue'] = data['Quantity'] * data['DiscountedPrice']

    # Calculate total expenses for each product
    data['TotalExpenses'] = (
        data['EmployeeExpenses']
        + data['OfficeExpenses']
        + data['MarketingExpenses']
        + data['MaintenanceExpenses']
    )

    # Calculate profitability (Profit = Total Revenue - Total Expenses)
    data['Profit'] = data['TotalRevenue'] - data['TotalExpenses']

    # Group by ProductID and calculate total revenue and profit for each product
    profitability_analysis = data.groupby('ProductName')[['TotalRevenue', 'Profit']].sum().reset_index()

    return profitability_analysis

# Example usage:
profitability_analysis = analyze_profitability(merged_df)
print(profitability_analysis.head())


   ProductName  TotalRevenue       Profit
0    Product_1   4556.263909  4452.582979
1   Product_10   3978.376375  3708.112586
2  Product_100   2848.009030  2722.997035
3   Product_11   1973.951939  1856.448989
4   Product_12   7443.956175  7277.850805


In [None]:
import pandas as pd
import numpy as np
import plotly.express as px

def find_optimal_pricing_strategy(data, product_id):
    """
    Find the optimal pricing strategy for a specific product.

    Parameters:
    - data: DataFrame containing sales, pricing, and expense information.
    - product_id: ID of the product for analysis.

    Returns:
    - DataFrame with average profitability for different pricing strategies.
    """

    # Filter data for the specified product
    product_data = data[data['ProductID'] == product_id].copy()

    # Generate a grid of base prices and discount rates
    base_prices = np.linspace(product_data['BasePrice'].min(), product_data['BasePrice'].max(), 10)
    discount_rates = np.linspace(product_data['DiscountRate'].min(), product_data['DiscountRate'].max(), 10)

    # Create a list to store DataFrames
    dfs = []

    # Calculate average profitability for each combination of base price and discount rate
    for base_price in base_prices:
        for discount_rate in discount_rates:
            product_data['DiscountedPrice'] = base_price * (1 - discount_rate)
            product_data['TotalRevenue'] = product_data['Quantity'] * product_data['DiscountedPrice']
            product_data['TotalExpenses'] = (
                product_data['EmployeeExpenses']
                + product_data['OfficeExpenses']
                + product_data['MarketingExpenses']
                + product_data['MaintenanceExpenses']
            )
            product_data['Profit'] = product_data['TotalRevenue'] - product_data['TotalExpenses']
            average_profitability = product_data['Profit'].mean()

            dfs.append(pd.DataFrame({
                'BasePrice': [base_price],
                'DiscountRate': [discount_rate],
                'AverageProfitability': [average_profitability]
            }))

    # Concatenate the list of DataFrames into a single DataFrame
    result_df = pd.concat(dfs, ignore_index=True)

    return result_df

# Example usage:
product_id_to_analyze = 5  # Replace with the product ID you want to analyze
optimal_strategy_results = find_optimal_pricing_strategy(merged_df, product_id_to_analyze)
print(optimal_strategy_results)



    BasePrice  DiscountRate  AverageProfitability
0   186.95622      0.175145            718.627459
1   186.95622      0.175145            718.627459
2   186.95622      0.175145            718.627459
3   186.95622      0.175145            718.627459
4   186.95622      0.175145            718.627459
..        ...           ...                   ...
95  186.95622      0.175145            718.627459
96  186.95622      0.175145            718.627459
97  186.95622      0.175145            718.627459
98  186.95622      0.175145            718.627459
99  186.95622      0.175145            718.627459

[100 rows x 3 columns]


In [None]:
import pandas as pd
import numpy as np
import plotly.express as px

def find_optimal_pricing_strategy_by_segment(data, segment_feature):
    """
    Find the optimal pricing strategy for different customer segments.

    Parameters:
    - data: DataFrame containing sales, pricing, and customer information.
    - segment_feature: Feature used for customer segmentation (e.g., 'Age', 'Gender').

    Returns:
    - DataFrame with average profitability for different pricing strategies and customer segments.
    """

    # Generate a grid of base prices and discount rates
    base_prices = np.linspace(data['BasePrice'].min(), data['BasePrice'].max(), 10)
    discount_rates = np.linspace(data['DiscountRate'].min(), data['DiscountRate'].max(), 10)

    # Create empty matrices to store data
    base_price_matrix, discount_rate_matrix, avg_profit_matrix = np.meshgrid(base_prices, discount_rates, 0)

    # Create a list to store DataFrames
    dfs = []

    # Calculate average profitability for each combination of base price, discount rate, and customer segment
    for i, base_price in enumerate(base_prices):
        for j, discount_rate in enumerate(discount_rates):
            data['DiscountedPrice'] = base_price * (1 - discount_rate)
            data['TotalRevenue'] = data['Quantity'] * data['DiscountedPrice']
            data['TotalExpenses'] = (
                data['EmployeeExpenses']
                + data['OfficeExpenses']
                + data['MarketingExpenses']
                + data['MaintenanceExpenses']
            )
            data['Profit'] = data['TotalRevenue'] - data['TotalExpenses']
            data['AverageProfitability'] = data.groupby(segment_feature)['Profit'].transform('mean')
            avg_profit_matrix[j, i] = data['AverageProfitability'].mean()

    # Create a DataFrame with the results
    result_df = pd.DataFrame({
        'BasePrice': base_price_matrix.flatten(),
        'DiscountRate': discount_rate_matrix.flatten(),
        'AverageProfitability': avg_profit_matrix.flatten(),
    })

    return result_df
# Example usage:
segment_feature_to_analyze = 'Age'  # Replace with the feature you want to use for segmentation
optimal_strategy_results_by_segment = find_optimal_pricing_strategy_by_segment(merged_df, segment_feature_to_analyze)
print(optimal_strategy_results_by_segment)



     BasePrice  DiscountRate  AverageProfitability
0    52.079320      0.052679                   230
1    68.464108      0.052679                   307
2    84.848896      0.052679                   384
3   101.233684      0.052679                   462
4   117.618472      0.052679                   539
..         ...           ...                   ...
95  134.003261      0.298934                   452
96  150.388049      0.298934                   510
97  166.772837      0.298934                   567
98  183.157625      0.298934                   624
99  199.542413      0.298934                   682

[100 rows x 3 columns]


In [None]:
import pandas as pd
import numpy as np
import plotly.express as px

def find_optimal_pricing_strategy_by_segment(data, segment_feature):
    """
    Find the optimal pricing strategy for different customer segments.

    Parameters:
    - data: DataFrame containing sales, pricing, and customer information.
    - segment_feature: Feature used for customer segmentation (e.g., 'Age', 'Gender').

    Returns:
    - DataFrame with average profitability for different pricing strategies and customer segments.
    """

    # Generate a grid of base prices and discount rates
    base_prices = np.linspace(data['BasePrice'].min(), data['BasePrice'].max(), 10)
    discount_rates = np.linspace(data['DiscountRate'].min(), data['DiscountRate'].max(), 10)

    # Create empty matrices to store data
    base_price_matrix, discount_rate_matrix, avg_profit_matrix = np.meshgrid(base_prices, discount_rates, 0)

    # Create a list to store DataFrames
    dfs = []

    # Calculate average profitability for each combination of base price, discount rate, and customer segment
    for i, base_price in enumerate(base_prices):
        for j, discount_rate in enumerate(discount_rates):
            data['DiscountedPrice'] = base_price * (1 - discount_rate)
            data['TotalRevenue'] = data['Quantity'] * data['DiscountedPrice']
            data['TotalExpenses'] = (
                data['EmployeeExpenses']
                + data['OfficeExpenses']
                + data['MarketingExpenses']
                + data['MaintenanceExpenses']
            )
            data['Profit'] = data['TotalRevenue'] - data['TotalExpenses']
            data['AverageProfitability'] = data.groupby(segment_feature)['Profit'].transform('mean')
            avg_profit_matrix[j, i] = data['AverageProfitability'].mean()

    # Create a DataFrame with the results
    result_df = pd.DataFrame({
        'BasePrice': base_price_matrix.flatten(),
        'DiscountRate': discount_rate_matrix.flatten(),
        'AverageProfitability': avg_profit_matrix.flatten(),
    })

    return result_df
# Example usage:
segment_feature_to_analyze = 'Gender'  # Replace with the feature you want to use for segmentation
optimal_strategy_results_by_segment = find_optimal_pricing_strategy_by_segment(merged_df, segment_feature_to_analyze)
print(optimal_strategy_results_by_segment)



     BasePrice  DiscountRate  AverageProfitability
0    52.079320      0.052679                   230
1    68.464108      0.052679                   307
2    84.848896      0.052679                   384
3   101.233684      0.052679                   462
4   117.618472      0.052679                   539
..         ...           ...                   ...
95  134.003261      0.298934                   452
96  150.388049      0.298934                   510
97  166.772837      0.298934                   567
98  183.157625      0.298934                   624
99  199.542413      0.298934                   682

[100 rows x 3 columns]


In [None]:
import pandas as pd
import numpy as np
import plotly.express as px

def find_optimal_pricing_strategy_by_segment(data, segment_feature):
    """
    Find the optimal pricing strategy for different customer segments.

    Parameters:
    - data: DataFrame containing sales, pricing, and customer information.
    - segment_feature: Feature used for customer segmentation (e.g., 'Age', 'Gender').

    Returns:
    - DataFrame with average profitability for different pricing strategies and customer segments.
    """

    # Generate a grid of base prices and discount rates
    base_prices = np.linspace(data['BasePrice'].min(), data['BasePrice'].max(), 10)
    discount_rates = np.linspace(data['DiscountRate'].min(), data['DiscountRate'].max(), 10)

    # Create empty matrices to store data
    base_price_matrix, discount_rate_matrix, avg_profit_matrix = np.meshgrid(base_prices, discount_rates, 0)

    # Create a list to store DataFrames
    dfs = []

    # Calculate average profitability for each combination of base price, discount rate, and customer segment
    for i, base_price in enumerate(base_prices):
        for j, discount_rate in enumerate(discount_rates):
            data['DiscountedPrice'] = base_price * (1 - discount_rate)
            data['TotalRevenue'] = data['Quantity'] * data['DiscountedPrice']
            data['TotalExpenses'] = (
                data['EmployeeExpenses']
                + data['OfficeExpenses']
                + data['MarketingExpenses']
                + data['MaintenanceExpenses']
            )
            data['Profit'] = data['TotalRevenue'] - data['TotalExpenses']
            data['AverageProfitability'] = data.groupby(segment_feature)['Profit'].transform('mean')
            avg_profit_matrix[j, i] = data['AverageProfitability'].mean()

    # Create a DataFrame with the results
    result_df = pd.DataFrame({
        'BasePrice': base_price_matrix.flatten(),
        'DiscountRate': discount_rate_matrix.flatten(),
        'AverageProfitability': avg_profit_matrix.flatten(),
    })

    return result_df
# Example usage:
segment_feature_to_analyze = 'PurchaseAmount'  # Replace with the feature you want to use for segmentation
optimal_strategy_results_by_segment = find_optimal_pricing_strategy_by_segment(merged_df, segment_feature_to_analyze)
print(optimal_strategy_results_by_segment)

     BasePrice  DiscountRate  AverageProfitability
0    52.079320      0.052679                   230
1    68.464108      0.052679                   307
2    84.848896      0.052679                   384
3   101.233684      0.052679                   462
4   117.618472      0.052679                   539
..         ...           ...                   ...
95  134.003261      0.298934                   452
96  150.388049      0.298934                   510
97  166.772837      0.298934                   567
98  183.157625      0.298934                   624
99  199.542413      0.298934                   682

[100 rows x 3 columns]


In [None]:
import pandas as pd
import numpy as np
import plotly.express as px

def find_optimal_pricing_strategy_by_segment(data, segment_feature):
    """
    Find the optimal pricing strategy for different customer segments.

    Parameters:
    - data: DataFrame containing sales, pricing, and customer information.
    - segment_feature: Feature used for customer segmentation (e.g., 'Age', 'Gender' 'PurchaseAmount' 'CompanyType').

    Returns:
    - DataFrame with average profitability for different pricing strategies and customer segments.
    """

    # Generate a grid of base prices and discount rates
    base_prices = np.linspace(data['BasePrice'].min(), data['BasePrice'].max(), 10)
    discount_rates = np.linspace(data['DiscountRate'].min(), data['DiscountRate'].max(), 10)

    # Create empty matrices to store data
    base_price_matrix, discount_rate_matrix, avg_profit_matrix = np.meshgrid(base_prices, discount_rates, 0)

    # Create a list to store DataFrames
    dfs = []

    # Calculate average profitability for each combination of base price, discount rate, and customer segment
    for i, base_price in enumerate(base_prices):
        for j, discount_rate in enumerate(discount_rates):
            data['DiscountedPrice'] = base_price * (1 - discount_rate)
            data['TotalRevenue'] = data['Quantity'] * data['DiscountedPrice']
            data['TotalExpenses'] = (
                data['EmployeeExpenses']
                + data['OfficeExpenses']
                + data['MarketingExpenses']
                + data['MaintenanceExpenses']
            )
            data['Profit'] = data['TotalRevenue'] - data['TotalExpenses']
            data['AverageProfitability'] = data.groupby(segment_feature)['Profit'].transform('mean')
            avg_profit_matrix[j, i] = data['AverageProfitability'].mean()

    # Create a DataFrame with the results
    result_df = pd.DataFrame({
        'BasePrice': base_price_matrix.flatten(),
        'DiscountRate': discount_rate_matrix.flatten(),
        'AverageProfitability': avg_profit_matrix.flatten(),
    })

    return result_df
# Example usage:
segment_feature_to_analyze = 'CompanyType'  # Replace with the feature you want to use for segmentation
optimal_strategy_results_by_segment = find_optimal_pricing_strategy_by_segment(merged_df, segment_feature_to_analyze)
print(optimal_strategy_results_by_segment)

     BasePrice  DiscountRate  AverageProfitability
0    52.079320      0.052679                   230
1    68.464108      0.052679                   307
2    84.848896      0.052679                   384
3   101.233684      0.052679                   462
4   117.618472      0.052679                   539
..         ...           ...                   ...
95  134.003261      0.298934                   452
96  150.388049      0.298934                   510
97  166.772837      0.298934                   567
98  183.157625      0.298934                   624
99  199.542413      0.298934                   682

[100 rows x 3 columns]
