In [6]:
import pandas as pd
import numpy as np

# Define function to generate synthetic data
def generate_synthetic_data(start_year, end_year, initial_data):
    # Create a date range for the given years
    date_range = pd.date_range(start=f'1/1/{start_year}', end=f'12/31/{end_year}', freq='M')
    num_months = len(date_range)

    # Generate synthetic data for production, sales, and financials
    data = {
        'Date': date_range,
        'Shirting Production (Meters)': np.random.uniform(1000, 2000, num_months),
        'Suiting Production (Meters)': np.random.uniform(0, 1500, num_months),
        'Own Production (Meters)': np.random.uniform(1500, 3000, num_months),
        'Sales (Meters)': np.random.uniform(1000, 2000, num_months),
        'Customer Feedback Rating': np.random.randint(1, 6, num_months),
        'Revenue (Rs)': np.random.uniform(100000, 500000, num_months),
        'Cost (Rs)': np.random.uniform(50000, 250000, num_months)
    }

    # Create DataFrame with the synthetic data
    df = pd.DataFrame(data)

    # Calculate additional metrics
    df['Profit (Rs)'] = df['Revenue (Rs)'] - df['Cost (Rs)']
    df['Inventory (Shirting Meters)'] = df['Shirting Production (Meters)'].cumsum() - df['Sales (Meters)'].cumsum()
    df['Inventory (Suiting Meters)'] = df['Suiting Production (Meters)'].cumsum() - df['Sales (Meters)'].cumsum()
    df['Inventory (Own Production Meters)'] = df['Own Production (Meters)'].cumsum() - df['Sales (Meters)'].cumsum()

    # Integrate initial data provided for the years 2020-21, 2021-22, 2022-23
    for index, row in initial_data.iterrows():
        date = row['Date']
        if date in df['Date'].values:
            df.loc[df['Date'] == date, 'Shirting Production (Meters)'] = row['Shirting Production (Meters)']
            df.loc[df['Date'] == date, 'Suiting Production (Meters)'] = row['Suiting Production (Meters)']
            df.loc[df['Date'] == date, 'Own Production (Meters)'] = row['Own Production (Meters)']
            df.loc[df['Date'] == date, 'Sales (Meters)'] = row['Sales (Meters)']
            df.loc[df['Date'] == date, 'Revenue (Rs)'] = row['Revenue (Rs)']
            df.loc[df['Date'] == date, 'Cost (Rs)'] = row['Cost (Rs)']
            df.loc[df['Date'] == date, 'Profit (Rs)'] = row['Profit (Rs)']
            df.loc[df['Date'] == date, 'Inventory (Shirting Meters)'] = row['Inventory (Shirting Meters)']
            df.loc[df['Date'] == date, 'Inventory (Suiting Meters)'] = row['Inventory (Suiting Meters)']
            df.loc[df['Date'] == date, 'Inventory (Own Production Meters)'] = row['Inventory (Own Production Meters)']

    return df

# Initial data provided
initial_data = {
    'Date': pd.to_datetime(['2020-03-31', '2021-03-31', '2022-03-31']),
    'Shirting Production (Meters)': [13917.45, 15884.60, 14647.75],
    'Suiting Production (Meters)': [0, 8769.65, 17954.25],
    'Own Production (Meters)': [19534.00, 26725.00, 40518.00],
    'Sales (Meters)': [17111.00, 23052.00, 37199.00],
    'Customer Feedback Rating': [4, 4, 5],
    'Revenue (Rs)': [4277842.50, 5762932.68, 9299836.00],
    'Cost (Rs)': [2138921.25, 2881466.34, 4649918.00],
    'Profit (Rs)': [2138921.25, 2881466.34, 4649918.00],
    'Inventory (Shirting Meters)': [0, 13917.45, 29802.05],
    'Inventory (Suiting Meters)': [0, 0, 8769.65],
    'Inventory (Own Production Meters)': [19534.00, 46259.00, 86777.00]
}

# Convert initial data to DataFrame
initial_data_df = pd.DataFrame(initial_data)

# Ensure data types are consistent
initial_data_df['Shirting Production (Meters)'] = initial_data_df['Shirting Production (Meters)'].astype(float)
initial_data_df['Suiting Production (Meters)'] = initial_data_df['Suiting Production (Meters)'].astype(float)
initial_data_df['Own Production (Meters)'] = initial_data_df['Own Production (Meters)'].astype(float)
initial_data_df['Sales (Meters)'] = initial_data_df['Sales (Meters)'].astype(float)
initial_data_df['Revenue (Rs)'] = initial_data_df['Revenue (Rs)'].astype(float)
initial_data_df['Cost (Rs)'] = initial_data_df['Cost (Rs)'].astype(float)
initial_data_df['Profit (Rs)'] = initial_data_df['Profit (Rs)'].astype(float)
initial_data_df['Inventory (Shirting Meters)'] = initial_data_df['Inventory (Shirting Meters)'].astype(float)
initial_data_df['Inventory (Suiting Meters)'] = initial_data_df['Inventory (Suiting Meters)'].astype(float)
initial_data_df['Inventory (Own Production Meters)'] = initial_data_df['Inventory (Own Production Meters)'].astype(float)

# Generate data from 2018 to 2023
df_synthetic = generate_synthetic_data(2018, 2023, initial_data_df)

# Display first few rows
print(df_synthetic.head())

# Save to CSV
df_synthetic.to_csv('synthetic_handloom_data.csv', index=False)


        Date  Shirting Production (Meters)  Suiting Production (Meters)  \
0 2018-01-31                   1917.618108                  1263.805037   
1 2018-02-28                   1565.981131                   701.108792   
2 2018-03-31                   1650.670806                   906.415417   
3 2018-04-30                   1444.196343                   381.532362   
4 2018-05-31                   1620.956912                   221.750828   

   Own Production (Meters)  Sales (Meters)  Customer Feedback Rating  \
0              2294.782581     1459.299287                         4   
1              2420.139869     1960.975002                         5   
2              1900.658798     1849.699927                         2   
3              2346.829238     1524.540343                         4   
4              2277.548402     1586.800623                         4   

    Revenue (Rs)      Cost (Rs)    Profit (Rs)  Inventory (Shirting Meters)  \
0  207417.065611  216943.876432   -95

Test data

In [2]:
import pandas as pd
import numpy as np
from datetime import datetime

# Generate month-end dates for 2024 up to July
dates = pd.date_range(start='2024-01-31', end='2024-07-31', freq='M').strftime('%d-%m-%Y').tolist()

# Create dummy data for these dates
np.random.seed(0)  # For reproducibility

data = {
    "Date": dates,
    "Shirting Production (Meters)": np.random.uniform(1000, 2000, size=len(dates)).round(2),
    "Suiting Production (Meters)": np.random.uniform(800, 1500, size=len(dates)).round(2),
    "Sales (Shirting)": np.random.uniform(5000, 10000, size=len(dates)).round(2),
    "Sales (Suiting)": np.random.uniform(4000, 8000, size=len(dates)).round(2),
    "Revenue (Shirting)": np.random.uniform(10000, 20000, size=len(dates)).round(2),
    "Revenue (Suiting)": np.random.uniform(8000, 16000, size=len(dates)).round(2),
    "Cost (Shirting)": np.random.uniform(2000, 5000, size=len(dates)).round(2),
    "Cost (Suiting)": np.random.uniform(1500, 4000, size=len(dates)).round(2),
    "Profit (Shirting)": np.random.uniform(1000, 4000, size=len(dates)).round(2),
    "Profit (Suiting)": np.random.uniform(800, 3000, size=len(dates)).round(2),
    "Inventory (Shirting Meters)": np.random.uniform(100, 500, size=len(dates)).round(2),
    "Inventory (Suiting Meters)": np.random.uniform(100, 500, size=len(dates)).round(2)
}

# Create DataFrame
df = pd.DataFrame(data)

# Save DataFrame to CSV
df.to_csv('test_data_2024.csv', index=False)


  dates = pd.date_range(start='2024-01-31', end='2024-07-31', freq='M').strftime('%d-%m-%Y').tolist()
