In [5]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import scienceplots
plt.style.use(['science'])

In [7]:
directory = "C:/Users/Robbe/SolNet-2/CostaRica"
total_df = pd.DataFrame()
for filename in ["2019","2020","2021"]:
    f = os.path.join(directory, filename)
    for nested_filename in os.listdir(f):
        f_nested = os.path.join(f, nested_filename)
        for csv_file in os.listdir(f_nested):
            f_nn = os.path.join(f_nested, csv_file)
            new_df = pd.read_csv(f_nn, sep=',')
            total_df = pd.concat([total_df, new_df])
total_df['Time'] = pd.to_datetime(total_df['Time'], format='mixed')
total_df = total_df.set_index('Time')
total_df = total_df.sort_index()
total_df = total_df.drop(total_df[~total_df['Energy (Wh)'].isna()].index) #There are aggregated values but we no interested


start = total_df.index.min()
end = total_df.index.max()
print(start, end)
expected_timestamps = pd.date_range(start=start, end=end, freq='15min')  # Assuming data is recorded every 15 minutes

missing_timestamps = expected_timestamps[~expected_timestamps.isin(total_df.index)]

if not missing_timestamps.empty:
    print(f" Missing Timestamps: {missing_timestamps}")
else:
    print(f"No missing chronological data.")

total_df = total_df.resample('h').mean()
total_df.head(50)


2019-10-04 00:00:00 2021-11-11 23:45:00
 Missing Timestamps: DatetimeIndex(['2019-11-01', '2019-12-01', '2020-01-01', '2020-02-01',
               '2020-03-01', '2020-04-01', '2020-05-01', '2020-06-01',
               '2020-07-01', '2020-08-01', '2020-09-01', '2020-10-01',
               '2020-11-01', '2020-12-01', '2021-01-01', '2021-02-01',
               '2021-03-01', '2021-04-01', '2021-05-01', '2021-06-01',
               '2021-07-01', '2021-08-01', '2021-09-01', '2021-10-01',
               '2021-11-01'],
              dtype='datetime64[ns]', freq=None)


Unnamed: 0_level_0,Battery Charge Level (%),Consumption (W),Export (W),Import (W),Solar Production (W),StoragePower.Power (W),System Production (W),Energy (Wh)
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2019-10-04 00:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
2019-10-04 01:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
2019-10-04 02:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
2019-10-04 03:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
2019-10-04 04:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
2019-10-04 05:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
2019-10-04 06:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
2019-10-04 07:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
2019-10-04 08:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
2019-10-04 09:00:00,0.0,2070.0547,192.53145,805.836125,1456.750025,0.0,1456.750025,


In [8]:
total_df.tail()

Unnamed: 0_level_0,Battery Charge Level (%),Consumption (W),Export (W),Import (W),Solar Production (W),StoragePower.Power (W),System Production (W),Energy (Wh)
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2021-11-11 19:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
2021-11-11 20:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
2021-11-11 21:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
2021-11-11 22:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
2021-11-11 23:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,


In [11]:
solar_generation = total_df[['Solar Production (W)']]
##04/10/2019 - 11/11/2021
solar_generation = solar_generation.tz_localize('America/Costa_Rica')
solar_generation = solar_generation.tz_convert('UTC')
solar_generation.head(50)

Unnamed: 0_level_0,Solar Production (W)
Time,Unnamed: 1_level_1
2019-10-04 06:00:00+00:00,0.0
2019-10-04 07:00:00+00:00,0.0
2019-10-04 08:00:00+00:00,0.0
2019-10-04 09:00:00+00:00,0.0
2019-10-04 10:00:00+00:00,0.0
2019-10-04 11:00:00+00:00,0.0
2019-10-04 12:00:00+00:00,0.0
2019-10-04 13:00:00+00:00,0.0
2019-10-04 14:00:00+00:00,0.0
2019-10-04 15:00:00+00:00,1456.750025


In [12]:
print(solar_generation.isna().sum())
print((solar_generation < 0).sum())
solar_generation.to_pickle('Data/CostaRica.pkl')

Solar Production (W)    0
dtype: int64
Solar Production (W)    0
dtype: int64
