# For creating files with all 50Hz

In [None]:
import pandas as pd
file_path_for_create = r'..\test\mock-week.feather'

In [None]:
start_date = '2019-12-31 23:00:00'
end_date = '2020-01-01 00:00:00'

date_index = pd.date_range(start=start_date, end=end_date, freq='S')
df = pd.DataFrame(index=date_index, columns=['Value'])
df['Value'] = 50.000
print(df)

In [None]:
df.to_feather(file_path_for_create)

# For creating deviations

### Set deviation values on set times

In [None]:
import pandas as pd

filepath = r'..\test\mock-year-1.feather'
df = pd.read_feather(filepath)
df.info()

In [None]:
start_date = '2019-12-31 23:00:00'
end_date = '2020-01-01 00:00:00'

start_date = pd.to_datetime(start_date)
end_date = pd.to_datetime(end_date)

new_value = 50.0

df.loc[start_date:end_date, 'Value'] = new_value

mask_changed = (df.index >= start_date - pd.Timedelta(seconds=2)) & (df.index <= end_date + pd.Timedelta(seconds=2))

print(df.loc[df.index[mask_changed]])

In [None]:
pd.options.display.float_format = '{:.1f}'.format # Tar bort vetenskaplig notation
print(df.info(),end='\n''\n''--------------------''\n''\n')
print(df.value_counts(),end='\n''\n''--------------------''\n''\n')
print(df.describe(),end='\n''\n''--------------------''\n''\n')

In [None]:
df.to_feather(filepath)

### Set an amount of deviation values on random times in dataset

In [None]:
import pandas as pd
import random

filepath = r'..\test\mock-year.feather'
df = pd.read_feather(filepath)
df.info()


In [None]:
num_deviations = 200000
min_deviation_value = 49.0
max_deviation_value = 51.0

deviation_values = [round(random.uniform(min_deviation_value, max_deviation_value), 1) for _ in range(10)]

for _ in range(num_deviations):
    random_start = random.choice(df.index)
    random_duration = pd.Timedelta(seconds=random.randint(1, 10))
    random_end = random_start + random_duration
    random_value = random.choice(deviation_values)

    df.loc[random_start:random_end, 'Value'] = random_value
    # print(f"Deviation at {random_start} with value {random_value}")

In [None]:
pd.options.display.float_format = '{:.1f}'.format # Tar bort vetenskaplig notation
print(df.info(),end='\n''\n''--------------------''\n''\n')
print(df.value_counts(),end='\n''\n''--------------------''\n''\n')
print(df.describe(),end='\n''\n''--------------------''\n''\n')

In [None]:
df.to_feather(filepath)

# Add/remove data to compensate for timezone differences and put the data in timezone: Europe/Stockholm

In [12]:
import pandas as pd

filepath = r'..\test\mock-1_39min.feather'
df = pd.read_feather(filepath)
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 100 entries, 2022-01-04 00:00:00 to 2022-01-04 00:01:39
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Value   100 non-null    float64
dtypes: float64(1)
memory usage: 1.6 KB


In [13]:
df.index = df.index.tz_localize('UTC', ambiguous='infer', nonexistent='shift_forward')
df.index = df.index.tz_convert('Europe/Stockholm')

In [14]:
hours_off_start = (df.index.min().utcoffset() / pd.Timedelta(hours=1))
hours_off_end = (df.index.max().utcoffset() / pd.Timedelta(hours=1))

print(hours_off_start, '\n\n\n', hours_off_end, '\n\n\n', df)

1.0 


 1.0 


                            Value
2022-01-04 01:00:00+01:00   50.0
2022-01-04 01:00:01+01:00   50.0
2022-01-04 01:00:02+01:00   50.0
2022-01-04 01:00:03+01:00   50.0
2022-01-04 01:00:04+01:00   50.0
...                          ...
2022-01-04 01:01:35+01:00   49.7
2022-01-04 01:01:36+01:00   49.7
2022-01-04 01:01:37+01:00   49.7
2022-01-04 01:01:38+01:00   50.0
2022-01-04 01:01:39+01:00   50.0

[100 rows x 1 columns]


In [15]:
first_hour = df.index.min() - pd.Timedelta(hours=hours_off_start)
start_time_add = pd.to_datetime(first_hour)

# Deleting the last hour and moving its values to the first hour
last_hour = df.index.max()

if hours_off_end == 1:
    hours_off_start_offset = last_hour.replace(minute=0, second=0, microsecond=0)
    hours_off_end_offset = last_hour.replace(minute=59, second=59, microsecond=59)
elif hours_off_end == 2:
    hours_off_start_offset = last_hour.replace(minute=0, second=0, microsecond=0) - pd.Timedelta(hours=1)
    hours_off_end_offset = last_hour.replace(hour=1, minute=59, second=59, microsecond=59)
else:
    hours_off_start_offset = None
    hours_off_end_offset = None

start_time_delete = pd.to_datetime(hours_off_start_offset)
end_time_delete = pd.to_datetime(hours_off_end_offset)
last_hour_values = df.loc[start_time_delete:end_time_delete, 'Value'].values
df = df.loc[~((df.index >= start_time_delete) & (df.index <= end_time_delete))]

# Adding the first hour and assigning values
temp_df = pd.DataFrame({'Value':last_hour_values}, index=pd.date_range(start_time_add, periods=len(last_hour_values), freq='S'))
df = pd.concat([temp_df, df])

print(df.head(2),'\n\n\n', df.tail(2))

                           Value
2022-01-04 00:00:00+01:00   50.0
2022-01-04 00:00:01+01:00   50.0 


                            Value
2022-01-04 00:01:38+01:00   50.0
2022-01-04 00:01:39+01:00   50.0


In [16]:
df.to_feather(filepath)

In [6]:
# df[(df.index >= end_time_delete - pd.Timedelta(days=31,hours=1,seconds=5)) & (df.index <= end_time_delete - pd.Timedelta(days=28))]

Unnamed: 0,Value
2022-09-01 00:00:00+02:00,50.0
2022-09-01 00:00:01+02:00,50.0
2022-09-01 00:00:02+02:00,50.0
2022-09-01 00:00:03+02:00,50.0
2022-09-01 00:00:04+02:00,50.0
...,...
2022-09-03 01:59:55+02:00,49.2
2022-09-03 01:59:56+02:00,50.0
2022-09-03 01:59:57+02:00,50.0
2022-09-03 01:59:58+02:00,50.0
