# Resample Dates step by step

In [5]:
import pandas as pd

df = pd.read_csv("data/power_consumption_g3_feat.csv")
df

Unnamed: 0,Date,Temperature,Humidity,WindSpeed,GeneralDiffuseFlows,DiffuseFlows
0,2017-03-01 16:40:00,21.33,55.91,0.080,387.400,427.300
1,2017-07-27 06:30:00,23.10,48.58,4.908,10.450,8.630
2,2017-10-11 19:00:00,23.10,59.82,0.084,0.446,0.322
3,2017-02-10 06:50:00,12.25,80.80,4.916,0.051,0.111
4,2017-03-06 16:00:00,15.62,59.38,0.075,533.400,579.900
...,...,...,...,...,...,...
52411,2017-05-14 02:20:00,23.58,43.10,0.075,0.110,0.122
52412,2017-11-17 19:20:00,17.30,76.50,0.075,0.040,0.148
52413,2017-03-21 12:10:00,17.90,50.28,0.081,837.000,296.700
52414,2017-07-28 05:10:00,25.23,61.32,4.907,0.091,0.119


In [6]:
df.isnull().sum()

Date                     0
Temperature            559
Humidity               499
WindSpeed              529
GeneralDiffuseFlows    524
DiffuseFlows           505
dtype: int64

### 1. Convert the 'Stamp' Column to DateTime  Format

In [8]:
df['Date'] = pd.to_datetime(df['Date'])
df

Unnamed: 0,Date,Temperature,Humidity,WindSpeed,GeneralDiffuseFlows,DiffuseFlows
0,2017-03-01 16:40:00,21.33,55.91,0.080,387.400,427.300
1,2017-07-27 06:30:00,23.10,48.58,4.908,10.450,8.630
2,2017-10-11 19:00:00,23.10,59.82,0.084,0.446,0.322
3,2017-02-10 06:50:00,12.25,80.80,4.916,0.051,0.111
4,2017-03-06 16:00:00,15.62,59.38,0.075,533.400,579.900
...,...,...,...,...,...,...
52411,2017-05-14 02:20:00,23.58,43.10,0.075,0.110,0.122
52412,2017-11-17 19:20:00,17.30,76.50,0.075,0.040,0.148
52413,2017-03-21 12:10:00,17.90,50.28,0.081,837.000,296.700
52414,2017-07-28 05:10:00,25.23,61.32,4.907,0.091,0.119


In [9]:
df.isnull().sum()

Date                     0
Temperature            559
Humidity               499
WindSpeed              529
GeneralDiffuseFlows    524
DiffuseFlows           505
dtype: int64

### 2. Round the Timestamps to the Nearest 2-hour Interval

In [11]:
df['Date'] = df['Date'].dt.floor('2h')
df

Unnamed: 0,Date,Temperature,Humidity,WindSpeed,GeneralDiffuseFlows,DiffuseFlows
0,2017-03-01 16:00:00,21.33,55.91,0.080,387.400,427.300
1,2017-07-27 06:00:00,23.10,48.58,4.908,10.450,8.630
2,2017-10-11 18:00:00,23.10,59.82,0.084,0.446,0.322
3,2017-02-10 06:00:00,12.25,80.80,4.916,0.051,0.111
4,2017-03-06 16:00:00,15.62,59.38,0.075,533.400,579.900
...,...,...,...,...,...,...
52411,2017-05-14 02:00:00,23.58,43.10,0.075,0.110,0.122
52412,2017-11-17 18:00:00,17.30,76.50,0.075,0.040,0.148
52413,2017-03-21 12:00:00,17.90,50.28,0.081,837.000,296.700
52414,2017-07-28 04:00:00,25.23,61.32,4.907,0.091,0.119


In [12]:
df.isnull().sum()

Date                     0
Temperature            559
Humidity               499
WindSpeed              529
GeneralDiffuseFlows    524
DiffuseFlows           505
dtype: int64

### 3. Set the Date Column as the Index of the DataFrame

In [14]:
df.set_index("Date", inplace=True)
df

Unnamed: 0_level_0,Temperature,Humidity,WindSpeed,GeneralDiffuseFlows,DiffuseFlows
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-03-01 16:00:00,21.33,55.91,0.080,387.400,427.300
2017-07-27 06:00:00,23.10,48.58,4.908,10.450,8.630
2017-10-11 18:00:00,23.10,59.82,0.084,0.446,0.322
2017-02-10 06:00:00,12.25,80.80,4.916,0.051,0.111
2017-03-06 16:00:00,15.62,59.38,0.075,533.400,579.900
...,...,...,...,...,...
2017-05-14 02:00:00,23.58,43.10,0.075,0.110,0.122
2017-11-17 18:00:00,17.30,76.50,0.075,0.040,0.148
2017-03-21 12:00:00,17.90,50.28,0.081,837.000,296.700
2017-07-28 04:00:00,25.23,61.32,4.907,0.091,0.119


In [15]:
df.isnull().sum()

Temperature            559
Humidity               499
WindSpeed              529
GeneralDiffuseFlows    524
DiffuseFlows           505
dtype: int64

### 4. Resample the Table to Regular 15-Minute Intervals

In [17]:
df = df.resample('2h').first()
df

Unnamed: 0_level_0,Temperature,Humidity,WindSpeed,GeneralDiffuseFlows,DiffuseFlows
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-01-01 00:00:00,5.496,75.00,0.085,0.055,0.093
2017-01-01 02:00:00,4.897,79.10,0.083,0.070,0.096
2017-01-01 04:00:00,4.382,76.90,0.081,0.073,0.148
2017-01-01 06:00:00,4.489,74.30,0.082,0.081,0.119
2017-01-01 08:00:00,4.807,73.10,0.082,0.955,0.949
...,...,...,...,...,...
2017-12-30 14:00:00,14.530,41.99,0.077,333.900,50.030
2017-12-30 16:00:00,14.150,41.52,0.076,140.000,175.000
2017-12-30 18:00:00,9.500,62.22,0.078,0.059,0.111
2017-12-30 20:00:00,7.950,70.20,0.082,0.062,0.085


In [18]:
df.isnull().sum()

Temperature            2
Humidity               0
WindSpeed              2
GeneralDiffuseFlows    0
DiffuseFlows           0
dtype: int64

### 5. Interpolate Missing Data Using Time-Based Interpolation

In [20]:
df.interpolate(method='time', inplace=True)
df

Unnamed: 0_level_0,Temperature,Humidity,WindSpeed,GeneralDiffuseFlows,DiffuseFlows
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-01-01 00:00:00,5.496,75.00,0.085,0.055,0.093
2017-01-01 02:00:00,4.897,79.10,0.083,0.070,0.096
2017-01-01 04:00:00,4.382,76.90,0.081,0.073,0.148
2017-01-01 06:00:00,4.489,74.30,0.082,0.081,0.119
2017-01-01 08:00:00,4.807,73.10,0.082,0.955,0.949
...,...,...,...,...,...
2017-12-30 14:00:00,14.530,41.99,0.077,333.900,50.030
2017-12-30 16:00:00,14.150,41.52,0.076,140.000,175.000
2017-12-30 18:00:00,9.500,62.22,0.078,0.059,0.111
2017-12-30 20:00:00,7.950,70.20,0.082,0.062,0.085


In [21]:
df.isnull().sum()

Temperature            0
Humidity               0
WindSpeed              0
GeneralDiffuseFlows    0
DiffuseFlows           0
dtype: int64

In [22]:
df = pd.DataFrame({'WeekNr': range(101)}) 

In [6]:
import pandas as pd

new_col = pd.TimeStamp("2024-01-01")

df['Date'] = start_date + pd.to_timedelta(df[""])


startdate = df.TimeStamp('2024-01-01')

df['Date'] = start_date + pd.to_timedelta(df['WeekNr']*7, unit="D")

NameError: name 'df' is not defined