# Time series operations in pandas

This document is not required for the class activity submission.


We'll utilize `pd` and `datetime` to explore time series operations in python.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime

In [2]:
# Creating a single Timestamp
timestamp = pd.Timestamp('2023-10-03')
print("Single Timestamp:", timestamp)

# Creating a range of dates
date_range = pd.date_range(start='2023-01-01', end='2023-01-07')
date_range


Single Timestamp: 2023-10-03 00:00:00


DatetimeIndex(['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04',
               '2023-01-05', '2023-01-06', '2023-01-07'],
              dtype='datetime64[ns]', freq='D')

Often, date and time data come in string formats. You can convert these strings to datetime objects using `pd.to_datetime()`

In [3]:
date_strings = ['2023-01-01', '2023-02-01', '2023-03-01']
dates = pd.to_datetime(date_strings)
print("Converted Dates:")
print(dates)

Converted Dates:
DatetimeIndex(['2023-01-01', '2023-02-01', '2023-03-01'], dtype='datetime64[ns]', freq=None)


If your date strings have a specific format, you can specify it using the `format` parameter.

In [4]:
date_strings = ['01/01/2023', '01/02/2023', '01/03/2023']
dates = pd.to_datetime(date_strings, format='%d/%m/%Y')
dates

DatetimeIndex(['2023-01-01', '2023-02-01', '2023-03-01'], dtype='datetime64[ns]', freq=None)

Format of time: [https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior)

In [5]:
# Create a DataFrame with random data
dates = pd.date_range('2023-01-01', periods=7)
data = np.random.randn(7)
df = pd.DataFrame(data, index=dates, columns=['Value'])
df

Unnamed: 0,Value
2023-01-01,0.093141
2023-01-02,-0.130254
2023-01-03,1.247183
2023-01-04,0.48015
2023-01-05,0.853044
2023-01-06,1.104431
2023-01-07,-1.114943


In [6]:
# Resample to a higher frequency (daily to hourly) and forward-fill missing values
df_hourly = df.resample('h').ffill()
print("Resampled Data (Hourly):")
df_hourly.head(10)

Resampled Data (Hourly):


Unnamed: 0,Value
2023-01-01 00:00:00,0.093141
2023-01-01 01:00:00,0.093141
2023-01-01 02:00:00,0.093141
2023-01-01 03:00:00,0.093141
2023-01-01 04:00:00,0.093141
2023-01-01 05:00:00,0.093141
2023-01-01 06:00:00,0.093141
2023-01-01 07:00:00,0.093141
2023-01-01 08:00:00,0.093141
2023-01-01 09:00:00,0.093141


In [7]:
# Shift data forward by one period
shifted_forward = df.shift(1)
print("\nData Shifted Forward by One Day:")
print(shifted_forward)

# Shift data backward by one period
shifted_backward = df.shift(-1)
print("\nData Shifted Backward by One Day:")
print(shifted_backward)


Data Shifted Forward by One Day:
               Value
2023-01-01       NaN
2023-01-02  0.093141
2023-01-03 -0.130254
2023-01-04  1.247183
2023-01-05  0.480150
2023-01-06  0.853044
2023-01-07  1.104431

Data Shifted Backward by One Day:
               Value
2023-01-01 -0.130254
2023-01-02  1.247183
2023-01-03  0.480150
2023-01-04  0.853044
2023-01-05  1.104431
2023-01-06 -1.114943
2023-01-07       NaN


In [8]:
# Generate a range of business days
business_days = pd.date_range('2023-01-01', periods=5, freq='B')
print("Business Days:")
print(business_days)

# Generate a range of monthly start dates
month_starts = pd.date_range('2023-01-01', periods=3, freq='MS')
print("\nMonth Starts:")
print(month_starts)


Business Days:
DatetimeIndex(['2023-01-02', '2023-01-03', '2023-01-04', '2023-01-05',
               '2023-01-06'],
              dtype='datetime64[ns]', freq='B')

Month Starts:
DatetimeIndex(['2023-01-01', '2023-02-01', '2023-03-01'], dtype='datetime64[ns]', freq='MS')


Count days and seconds use `datetime`

In [9]:
from datetime import datetime

start_date = datetime(2024, 1, 1)


given_date = datetime(2024, 10, 3, hour=17, minute=15, second=11, microsecond=0)

# Calculate the time difference
time_difference = given_date - start_date

# Calculate the difference in days, seconds, and weeks
days_difference = time_difference.days
days_difference

276

In [10]:
seconds_difference = time_difference.total_seconds()
seconds_difference

23908511.0

In [11]:
time_difference

datetime.timedelta(days=276, seconds=62111)

In [12]:
datetime.today()

datetime.datetime(2024, 10, 6, 20, 57, 6, 692049)

In [13]:
# check if 2000 days from today is a Sunday
from datetime import datetime, timedelta

# Get today's date
today = datetime.today()

# Calculate the date 2000 days from today
future_date = today + timedelta(days=2000)

# Check if the day is Sunday (Sunday is 6 in weekday())
future_date.weekday()

4

### Example:  Check the date and time 100,000 seconds from now


In [14]:
# check if 2000 days from today is a Sunday
from datetime import datetime, timedelta

# Get today's date
today = datetime.today()

# Calculate the date 100,000 seconds from today
future_date = today + timedelta(seconds=100000)

future_date

datetime.datetime(2024, 10, 8, 0, 43, 46, 759728)

# Exercise:

In [15]:
import seaborn as sns
flights = sns.load_dataset('flights')
print("First Five Rows of Flights Dataset:")
flights.head()

First Five Rows of Flights Dataset:


Unnamed: 0,year,month,passengers
0,1949,Jan,112
1,1949,Feb,118
2,1949,Mar,132
3,1949,Apr,129
4,1949,May,121


In [16]:
# Combine 'year' and 'month' into a single 'date' column with specified format
flights['date'] = pd.to_datetime(flights['year'].astype(str) + '-' + flights['month'].astype(str), format='%Y-%b')

print("First Five Rows with Date Column:")
flights.head()

First Five Rows with Date Column:


Unnamed: 0,year,month,passengers,date
0,1949,Jan,112,1949-01-01
1,1949,Feb,118,1949-02-01
2,1949,Mar,132,1949-03-01
3,1949,Apr,129,1949-04-01
4,1949,May,121,1949-05-01


In [17]:
# Set 'date' as the index
flights.set_index('date', inplace=True)
print("DataFrame with DateTime Index:")
flights.head()

DataFrame with DateTime Index:


Unnamed: 0_level_0,year,month,passengers
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1949-01-01,1949,Jan,112
1949-02-01,1949,Feb,118
1949-03-01,1949,Mar,132
1949-04-01,1949,Apr,129
1949-05-01,1949,May,121


In [18]:
# Resample the data to quarterly frequency and calculate the sum of passengers
flights_quarterly = flights.resample('QE')['passengers'].sum()
print("Quarterly Passenger Totals:")
flights_quarterly.head()

ValueError: Invalid frequency: QE

In [19]:
# Resample the data to yearly frequency and calculate the mean of passengers
flights_yearly = flights.resample('YE')['passengers'].mean()
print("\nYearly Passenger Averages:")
flights_yearly.head()

ValueError: Invalid frequency: YE

In [None]:
# Calculate the 12-month rolling mean of passengers
flights['rolling_mean'] = flights['passengers'].rolling(window=12).mean()
print("\nData with 12-Month Rolling Mean of Passengers:")
flights[['passengers', 'rolling_mean']].head(15)

In [None]:
# Plot the monthly passenger data
plt.figure(figsize=(12, 6))
plt.plot(flights.index, flights['passengers'], label='Passengers')
plt.title('Monthly Number of Airline Passengers (1949-1960)')
plt.xlabel('Date')
plt.ylabel('Number of Passengers')
plt.legend()
plt.grid(True)

In [None]:
# Plot the rolling mean along with the original data
plt.figure(figsize=(12, 6))
plt.plot(flights.index, flights['passengers'], label='Original')
plt.plot(flights.index, flights['rolling_mean'], label='12-Month Rolling Mean', color='red')
plt.title('Monthly Passengers with 12-Month Rolling Mean')
plt.xlabel('Date')
plt.ylabel('Number of Passengers')
plt.legend()
plt.grid(True)