In [130]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta

import dtale

In [193]:
# Load the uploaded CSV file
file_path_h = 'data\old\modelhealthdata.csv'

# Load the uploaded CSV file
data = pd.read_csv(file_path_h)

In [195]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21 entries, 0 to 20
Data columns (total 10 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Project ID  20 non-null     float64
 1   PDH Name    20 non-null     object 
 2   Revit File  20 non-null     object 
 3   Name        20 non-null     object 
 4   Result      20 non-null     object 
 5   Count       20 non-null     float64
 6   Day         20 non-null     float64
 7   Month       20 non-null     float64
 8   Year        20 non-null     float64
 9   Date        20 non-null     object 
dtypes: float64(5), object(5)
memory usage: 1.8+ KB


In [179]:
# Create a date range for the next 30 days
# Get the current date
now = datetime.now()
start_date = now - relativedelta(months=1)
date_range = [start_date + timedelta(days=i) for i in range(30)]


# Initialize an empty list to store dataframes
dummy_data_list = []

# Generate dummy data
for date in date_range:
    daily_data = []
    for index, row in data.iterrows():
        # Create a base count value with 10% variance
        base_count = row['Count']
        if not pd.isna(base_count):
            count = max(0, int(np.random.normal(base_count, base_count * 0.1)))
        else:
            count = 0
        
        # Create a dictionary for the new row
        daily_data.append({
            'Project ID': row['Project ID'],
            'PDH Name': row['PDH Name'],
            'Revit File': row['Revit File'],
            'Name': row['Name'],
            'Result': row['Result'],
            'Count': count,
            'Date': date.strftime('%Y-%m-%d'),
            'Day': date.day,
            'Month': date.month,
            'Year': date.year
        })
    
    # Convert daily_data list to DataFrame and add to dummy_data_list
    daily_df = pd.DataFrame(daily_data)
    dummy_data_list.append(daily_df)

# Concatenate all daily dataframes into a single dataframe
dummy_data = pd.concat(dummy_data_list, ignore_index=True)

dummy_data = dummy_data.dropna(subset=['Project ID'])

In [198]:
dummy_data['Project ID'] = dummy_data['Project ID'].astype('Int64')
dummy_data['Count'] = dummy_data['Count'].astype('Int64')
dummy_data['Date'] = pd.to_datetime(dummy_data['Date'])
dummy_data['Day'] = dummy_data['Date'].dt.day
dummy_data['Month'] = dummy_data['Date'].dt.month
dummy_data['Year'] = dummy_data['Date'].dt.year

In [199]:
dummy_data.columns

Index(['Project ID', 'PDH Name', 'Revit File', 'Name', 'Result', 'Count',
       'Date', 'Day', 'Month', 'Year'],
      dtype='object')

In [200]:
dummy_data.head(25)

Unnamed: 0,Project ID,PDH Name,Revit File,Name,Result,Count,Date,Day,Month,Year
0,76941,Lokesh Sharma,rac_advanced_sample_project.rvt,Warnings,Report,12,2024-05-21,21,5,2024
1,76941,Lokesh Sharma,rac_advanced_sample_project.rvt,File Size,Report,169,2024-05-21,21,5,2024
2,76941,Lokesh Sharma,rac_advanced_sample_project.rvt,Purgable Elements,Report,412,2024-05-21,21,5,2024
3,76941,Lokesh Sharma,rac_advanced_sample_project.rvt,Worksets,Report,5,2024-05-21,21,5,2024
4,76941,Lokesh Sharma,rac_advanced_sample_project.rvt,Design Options,Report,0,2024-05-21,21,5,2024
5,76941,Lokesh Sharma,rac_advanced_sample_project.rvt,Duplicate Instances,Report,49,2024-05-21,21,5,2024
6,76941,Lokesh Sharma,rac_advanced_sample_project.rvt,Linked Revit Files,Report,10,2024-05-21,21,5,2024
7,76941,Lokesh Sharma,rac_advanced_sample_project.rvt,Linked CAD Files,Report,1,2024-05-21,21,5,2024
8,76941,Lokesh Sharma,rac_advanced_sample_project.rvt,Imported CAD files,Report,0,2024-05-21,21,5,2024
9,76941,Lokesh Sharma,rac_advanced_sample_project.rvt,Imported SKP files,Report,0,2024-05-21,21,5,2024


In [185]:
d = dtale.show(dummy_data)
d.open_browser()

In [121]:
# # dummy_data.Date.value_counts()
# dummy_data = dummy_data.drop_duplicates(['Project ID', 'PDH Name', 'Revit File', 'Name', 'Date'])
# # dummy_data = dummy_data.drop_duplicates(['Project Id', 'PDH Name', ' FileName', ' ElementID', ' UniqueCategory',
# #        'Services', 'Date'])

In [201]:
# Save the generated dummy data to a CSV file
output_file_path = 'data\health\Model_Health_Data'
# output_file_path = 'data\health\LOI_Data'

In [202]:
dummy_data.to_csv(output_file_path+'.csv', index=False)
dummy_data.to_parquet(output_file_path+'.parquet', index=False)

2024-06-21 11:47:16,320 - INFO     - Executing shutdown due to inactivity...
2024-06-21 11:47:20,409 - INFO     - Executing shutdown...
2024-06-21 11:47:20,422 - INFO     - Not running with the Werkzeug Server, exiting by searching gc for BaseWSGIServer


In [124]:
data = pd.read_parquet(output_file_path+'.parquet')

In [125]:
data.head()

Unnamed: 0,Project ID,PDH Name,Revit File,Name,Result,Count,Day,Month,Year,Date
0,76941,Lokesh Sharma,rac_advanced_sample_project.rvt,Warnings,Report,14,21,5,2024,2024-05-21
1,76941,Lokesh Sharma,rac_advanced_sample_project.rvt,File Size,Report,185,21,5,2024,2024-05-21
2,76941,Lokesh Sharma,rac_advanced_sample_project.rvt,Purgable Elements,Report,445,21,5,2024,2024-05-21
3,76941,Lokesh Sharma,rac_advanced_sample_project.rvt,Worksets,Report,6,21,5,2024,2024-05-21
4,76941,Lokesh Sharma,rac_advanced_sample_project.rvt,Design Options,Report,0,21,5,2024,2024-05-21


In [115]:
import dtale
d = dtale.show(data)
d.open_browser()


The behavior of 'isin' with dtype=datetime64[ns] and castable values (e.g. strings) is deprecated. In a future version, these will not be considered matching by isin. Explicitly cast to the appropriate dtype before calling isin instead.


The behavior of 'isin' with dtype=datetime64[ns] and castable values (e.g. strings) is deprecated. In a future version, these will not be considered matching by isin. Explicitly cast to the appropriate dtype before calling isin instead.


The behavior of 'isin' with dtype=datetime64[ns] and castable values (e.g. strings) is deprecated. In a future version, these will not be considered matching by isin. Explicitly cast to the appropriate dtype before calling isin instead.


The behavior of 'isin' with dtype=datetime64[ns] and castable values (e.g. strings) is deprecated. In a future version, these will not be considered matching by isin. Explicitly cast to the appropriate dtype before calling isin instead.


The behavior of 'isin' with dtype=datetime6

In [78]:
data.shape

(600, 10)

In [79]:
data.columns

Index(['Project ID', 'PDH Name', 'Revit File', 'Name', 'Result', 'Count',
       'Day', 'Month', 'Year', 'Date'],
      dtype='object')

In [80]:
data = data.drop_duplicates(['Project ID', 'PDH Name', 'Revit File', 'Name', 'Date'])

In [81]:
data.shape

(600, 10)

In [73]:
data.head(40)

Unnamed: 0,Project ID,PDH Name,Revit File,Name,Result,Count,Day,Month,Year,Date
0,76941,Lokesh Sharma,rac_advanced_sample_project.rvt,Warnings,Report,14,20,5,2024,2024-05-20
1,76941,Lokesh Sharma,rac_advanced_sample_project.rvt,File Size,Report,185,20,5,2024,2024-05-20
2,76941,Lokesh Sharma,rac_advanced_sample_project.rvt,Purgable Elements,Report,445,20,5,2024,2024-05-20
3,76941,Lokesh Sharma,rac_advanced_sample_project.rvt,Worksets,Report,6,20,5,2024,2024-05-20
4,76941,Lokesh Sharma,rac_advanced_sample_project.rvt,Design Options,Report,0,20,5,2024,2024-05-20
5,76941,Lokesh Sharma,rac_advanced_sample_project.rvt,Duplicate Instances,Report,47,20,5,2024,2024-05-20
6,76941,Lokesh Sharma,rac_advanced_sample_project.rvt,Linked Revit Files,Report,10,20,5,2024,2024-05-20
7,76941,Lokesh Sharma,rac_advanced_sample_project.rvt,Linked CAD Files,Report,1,20,5,2024,2024-05-20
8,76941,Lokesh Sharma,rac_advanced_sample_project.rvt,Imported CAD files,Report,0,20,5,2024,2024-05-20
9,76941,Lokesh Sharma,rac_advanced_sample_project.rvt,Imported SKP files,Report,0,20,5,2024,2024-05-20
