## Timestamp converter

In [11]:
import pandas as pd
from datetime import datetime, timedelta

file_name = 'mycsv2 (1)'
file_path = './'
time_df = pd.read_csv(f'{file_path}{file_name}.csv')

time_df.drop('Unnamed: 0', axis=1, inplace=True)
time_df.head()

In [12]:
time_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 86083 entries, 0 to 86082
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   DateTime    86083 non-null  object
 1   FolderPath  86083 non-null  object
 2   FileName    86083 non-null  object
dtypes: object(3)
memory usage: 2.0+ MB


In [13]:
def dateTimeObj_converter(date): 
    # Converting string to datetime object
    # Arguments: date (string)

    date_obj = datetime.strptime(date, '%Y-%m-%d %H:%M:%S')
    date_obj = date_obj.replace(second=0)
    return date_obj

In [14]:
time_df_tmp = time_df.copy()

time_df_tmp['DateTime'] = time_df_tmp['DateTime'].apply(dateTimeObj_converter)

# Round the time to the nearest 5 minutes
time_df_tmp['DateTime'] = time_df_tmp['DateTime'].dt.round('5min')

# Drop duplicates
time_df_tmp.drop_duplicates(subset=['DateTime'], inplace=True)

time_df_tmp.to_csv(f'{file_path}{file_name}_edited.csv', index=True)

In [15]:
time_df_tmp.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 85389 entries, 0 to 86082
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   DateTime    85389 non-null  datetime64[ns]
 1   FolderPath  85389 non-null  object        
 2   FileName    85389 non-null  object        
dtypes: datetime64[ns](1), object(2)
memory usage: 2.6+ MB


## Missing date collector

In [26]:
df = pd.read_csv('mycsv2 (1)_edited.csv').drop('Unnamed: 0', axis=1)
df.head()

Unnamed: 0,DateTime,FolderPath,FileName
0,2020-06-18 06:30:00,/content/bkk_radar_images_1/,1592461954.png
1,2020-06-18 06:35:00,/content/bkk_radar_images_1/,1592462136.png
2,2020-06-18 06:45:00,/content/bkk_radar_images_1/,1592462768.png
3,2020-06-18 06:50:00,/content/bkk_radar_images_1/,1592463068.png
4,2020-06-18 06:55:00,/content/bkk_radar_images_1/,1592463277.png


In [55]:
lt_date = df['DateTime'].to_list()
lt_date[:5]

['2020-06-18 06:30:00',
 '2020-06-18 06:35:00',
 '2020-06-18 06:45:00',
 '2020-06-18 06:50:00',
 '2020-06-18 06:55:00']

In [56]:
start_date = datetime.strptime(lt_date[0], '%Y-%m-%d %H:%M:%S')
time_df = timedelta(minutes=5)
end_date = datetime.strptime(lt_date[-1], '%Y-%m-%d %H:%M:%S')

display(start_date, end_date)

datetime.datetime(2020, 6, 18, 6, 30)

datetime.datetime(2021, 6, 28, 21, 20)

In [57]:
missing_date = []

while start_date != end_date:
    date = start_date.strftime("%Y-%m-%d %H:%M:%S")
    if date not in lt_date:
        missing_date.append(date)
    start_date += time_df

len(missing_date)

22790

In [58]:
missing_date[:5]

['2020-06-18 06:40:00',
 '2020-06-18 07:15:00',
 '2020-06-18 07:35:00',
 '2020-06-18 07:50:00',
 '2020-06-18 08:30:00']

In [60]:
missing_df = pd.DataFrame(missing_date, columns=['DateTime'])
missing_df.head()

Unnamed: 0,DateTime
0,2020-06-18 06:40:00
1,2020-06-18 07:15:00
2,2020-06-18 07:35:00
3,2020-06-18 07:50:00
4,2020-06-18 08:30:00


In [68]:
# compare the date between the original dataframe and the missing dataframe

missing_df.DateTime.isin(df.DateTime).astype(int).value_counts()

0    22790
Name: DateTime, dtype: int64

In [61]:
missing_df.to_csv('missing.csv', index=True)