In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta

import dtale

In [4]:
# Load the uploaded CSV file
file_path_l = 'data\old\LOI Dummy Data.csv'

# Load the uploaded CSV file
data = pd.read_csv(file_path_l)

In [10]:
data.columns = data.columns.str.strip()

In [54]:
# Create a date range for the next 30 days
# Get the current date
now = datetime.now()
start_date = now - relativedelta(months=1)
date_range = [start_date + timedelta(days=i) for i in range(30)]


# Initialize an empty list to store dataframes
dummy_data_list = []

# Generate dummy data
for date in date_range:
    daily_data = []
    for index, row in data.iterrows():
        # Create base values with some variance
        base_parametervalueN = row['ParameterName']
        if not pd.isna(base_parametervalueN):
            parametervalueN = max(1, int(np.random.normal(base_parametervalueN, base_parametervalueN * 0.1)))
        else:
            parametervalueN = 1  # Ensure at least one item for logical consistency
        
        base_parametervalue = row['ParameterValue']
        if not pd.isna(base_parametervalue):
            parametervalue = max(0, int(np.random.normal(base_parametervalue, base_parametervalue * 0.1)))
        else:
            parametervalue = 0
        
        # Calculate LOI %
        if parametervalueN > 0:
            loi_percent = (parametervalue / parametervalueN) * 100
        else:
            loi_percent = 0

        # Create a dictionary for the new row
        new_row = {
            'Project Id': row['Project Id'],
            'PDH Name': row['PDH Name'],
            'FileName': row['FileName'],
            'ElementID': row['ElementID'] + date_range.index(date),  # Ensure unique ElementID per day
            'UniqueCategory': row['UniqueCategory'],
            'Services': row['Services'],
            'ParameterName': parametervalueN,
            'ParameterValue': parametervalue,
            'LOI %': loi_percent,
            'Date': date.strftime('%Y-%m-%d'),
            'Day': date.day,
            'Month': date.month,
            'Year': date.year
        }
        
        # Append the new row to daily_data list
        daily_data.append(new_row)

    # Convert daily_data list to DataFrame and add to dummy_data_list
    daily_df = pd.DataFrame(daily_data)
    
    # Drop rows where 'Project Id' is NaN
    daily_df = daily_df.dropna(subset=['Project Id'])
    
    dummy_data_list.append(daily_df)

# Concatenate all daily dataframes into a single dataframe
dummy_data = pd.concat(dummy_data_list, ignore_index=True)

In [63]:
dummy_data['Date'] = pd.to_datetime(dummy_data['Date'])
dummy_data['Day'] = dummy_data['Date'].dt.day
dummy_data['Month'] = dummy_data['Date'].dt.month
dummy_data['Year'] = dummy_data['Date'].dt.year

In [55]:
dummy_data.columns

Index(['Project Id', 'PDH Name', 'FileName', 'ElementID', 'UniqueCategory',
       'Services', 'ParameterName', 'ParameterValue', 'LOI %', 'Date', 'Day',
       'Month', 'Year'],
      dtype='object')

In [64]:
dummy_data = dummy_data.dropna(subset=['ElementID'])
dummy_data = dummy_data.drop_duplicates(subset=['Project Id', 'ElementID', 'Date'])

In [57]:
# dummy_data[dummy_data['ElementID']==1439711]['Date'].value_counts()
dummy_data.tail(5)

Unnamed: 0,Project Id,PDH Name,FileName,ElementID,UniqueCategory,Services,ParameterName,ParameterValue,LOI %,Date,Day,Month,Year
4911377,76941,Lokesh Sharma,Snowdon Towers Sample HVAC.rvt,1499382,Pipe,Fire Protection,35,48,137.142857,2024-06-19,19,6,2024
4911378,76941,Lokesh Sharma,Snowdon Towers Sample HVAC.rvt,1499383,Pipe,Fire Protection,43,39,90.697674,2024-06-19,19,6,2024
4911379,76941,Lokesh Sharma,Snowdon Towers Sample HVAC.rvt,1499384,Pipe,Fire Protection,42,41,97.619048,2024-06-19,19,6,2024
4911380,76941,Lokesh Sharma,Snowdon Towers Sample HVAC.rvt,1499385,Pipe,Fire Protection,52,42,80.769231,2024-06-19,19,6,2024
4911381,76941,Lokesh Sharma,Snowdon Towers Sample HVAC.rvt,1499386,Pipe,Fire Protection,41,41,100.0,2024-06-19,19,6,2024


In [65]:
dummy_data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 950160 entries, 0 to 4911381
Data columns (total 13 columns):
 #   Column          Non-Null Count   Dtype         
---  ------          --------------   -----         
 0   Project Id      950160 non-null  int64         
 1   PDH Name        950160 non-null  object        
 2   FileName        950160 non-null  object        
 3   ElementID       950160 non-null  int64         
 4   UniqueCategory  950160 non-null  object        
 5   Services        950160 non-null  object        
 6   ParameterName   950160 non-null  int64         
 7   ParameterValue  950160 non-null  int64         
 8   LOI %           950160 non-null  float64       
 9   Date            950160 non-null  datetime64[ns]
 10  Day             950160 non-null  int32         
 11  Month           950160 non-null  int32         
 12  Year            950160 non-null  int32         
dtypes: datetime64[ns](1), float64(1), int32(3), int64(4), object(4)
memory usage: 90.6+ MB


In [67]:
# dummy_data.tail(25)
dummy_data[dummy_data['ElementID']==1499382]

Unnamed: 0,Project Id,PDH Name,FileName,ElementID,UniqueCategory,Services,ParameterName,ParameterValue,LOI %,Date,Day,Month,Year
4254613,76941,Lokesh Sharma,Snowdon Towers Sample HVAC.rvt,1499382,Pipe,Fire Protection,35,40,114.285714,2024-06-15,15,6,2024
4418804,76941,Lokesh Sharma,Snowdon Towers Sample HVAC.rvt,1499382,Pipe,Fire Protection,42,44,104.761905,2024-06-16,16,6,2024
4582995,76941,Lokesh Sharma,Snowdon Towers Sample HVAC.rvt,1499382,Pipe,Fire Protection,53,33,62.264151,2024-06-17,17,6,2024
4747186,76941,Lokesh Sharma,Snowdon Towers Sample HVAC.rvt,1499382,Pipe,Fire Protection,57,37,64.912281,2024-06-18,18,6,2024
4911377,76941,Lokesh Sharma,Snowdon Towers Sample HVAC.rvt,1499382,Pipe,Fire Protection,35,48,137.142857,2024-06-19,19,6,2024


In [19]:
# d = dtale.show(dummy_data)
# d.open_browser()

In [121]:
# # dummy_data.Date.value_counts()
# dummy_data = dummy_data.drop_duplicates(['Project ID', 'PDH Name', 'Revit File', 'Name', 'Date'])
# # dummy_data = dummy_data.drop_duplicates(['Project Id', 'PDH Name', ' FileName', ' ElementID', ' UniqueCategory',
# #        'Services', 'Date'])

In [61]:
# Save the generated dummy data to a CSV file
output_file_path = 'data\health\LOI_Data'

In [68]:
dummy_data.to_csv(output_file_path+'.csv', index=False)
dummy_data.to_parquet(output_file_path+'.parquet', index=False)

In [70]:
data = pd.read_parquet(output_file_path+'.parquet')

In [71]:
data.head()

Unnamed: 0,Project Id,PDH Name,FileName,ElementID,UniqueCategory,Services,ParameterName,ParameterValue,LOI %,Date,Day,Month,Year
0,76941,Lokesh Sharma,Snowdon Towers Sample HVAC.rvt,1439710,Conduit,Electrical,46,0,0.0,2024-05-21,21,5,2024
1,76941,Lokesh Sharma,Snowdon Towers Sample HVAC.rvt,1439711,Conduit,Electrical,52,0,0.0,2024-05-21,21,5,2024
2,76941,Lokesh Sharma,Snowdon Towers Sample HVAC.rvt,1439712,Conduit,Electrical,42,0,0.0,2024-05-21,21,5,2024
3,76941,Lokesh Sharma,Snowdon Towers Sample HVAC.rvt,1439713,Conduit,Electrical,45,0,0.0,2024-05-21,21,5,2024
4,76941,Lokesh Sharma,Snowdon Towers Sample HVAC.rvt,1439714,Conduit,Electrical,43,0,0.0,2024-05-21,21,5,2024


In [52]:
# import dtale
# d = dtale.show(data)
# d.open_browser()

In [78]:
data.shape

(600, 10)

In [79]:
data.columns

Index(['Project ID', 'PDH Name', 'Revit File', 'Name', 'Result', 'Count',
       'Day', 'Month', 'Year', 'Date'],
      dtype='object')

In [80]:
data = data.drop_duplicates(['Project ID', 'PDH Name', 'Revit File', 'Name', 'Date'])

In [81]:
data.shape

(600, 10)

In [73]:
data.head(40)

Unnamed: 0,Project ID,PDH Name,Revit File,Name,Result,Count,Day,Month,Year,Date
0,76941,Lokesh Sharma,rac_advanced_sample_project.rvt,Warnings,Report,14,20,5,2024,2024-05-20
1,76941,Lokesh Sharma,rac_advanced_sample_project.rvt,File Size,Report,185,20,5,2024,2024-05-20
2,76941,Lokesh Sharma,rac_advanced_sample_project.rvt,Purgable Elements,Report,445,20,5,2024,2024-05-20
3,76941,Lokesh Sharma,rac_advanced_sample_project.rvt,Worksets,Report,6,20,5,2024,2024-05-20
4,76941,Lokesh Sharma,rac_advanced_sample_project.rvt,Design Options,Report,0,20,5,2024,2024-05-20
5,76941,Lokesh Sharma,rac_advanced_sample_project.rvt,Duplicate Instances,Report,47,20,5,2024,2024-05-20
6,76941,Lokesh Sharma,rac_advanced_sample_project.rvt,Linked Revit Files,Report,10,20,5,2024,2024-05-20
7,76941,Lokesh Sharma,rac_advanced_sample_project.rvt,Linked CAD Files,Report,1,20,5,2024,2024-05-20
8,76941,Lokesh Sharma,rac_advanced_sample_project.rvt,Imported CAD files,Report,0,20,5,2024,2024-05-20
9,76941,Lokesh Sharma,rac_advanced_sample_project.rvt,Imported SKP files,Report,0,20,5,2024,2024-05-20
