# Handle TIme series using Pandas

In [1]:
import pandas as pd
import matplotlib.pyplot as plt

In [4]:
air = pd.read_csv("../Data/air_quality_no2_long.csv")
air = air.rename(
    columns={
        "date.utc" : "datetime"
    }
)
air.head()

Unnamed: 0,city,country,datetime,location,parameter,value,unit
0,Paris,FR,2019-06-21 00:00:00+00:00,FR04014,no2,20.0,µg/m³
1,Paris,FR,2019-06-20 23:00:00+00:00,FR04014,no2,21.8,µg/m³
2,Paris,FR,2019-06-20 22:00:00+00:00,FR04014,no2,26.5,µg/m³
3,Paris,FR,2019-06-20 21:00:00+00:00,FR04014,no2,24.9,µg/m³
4,Paris,FR,2019-06-20 20:00:00+00:00,FR04014,no2,21.4,µg/m³


In [5]:
air.city.unique()

array(['Paris', 'Antwerpen', 'London'], dtype=object)

### How to handle time series data with ease

In [6]:
air["datetime"]

0       2019-06-21 00:00:00+00:00
1       2019-06-20 23:00:00+00:00
2       2019-06-20 22:00:00+00:00
3       2019-06-20 21:00:00+00:00
4       2019-06-20 20:00:00+00:00
                  ...            
2063    2019-05-07 06:00:00+00:00
2064    2019-05-07 04:00:00+00:00
2065    2019-05-07 03:00:00+00:00
2066    2019-05-07 02:00:00+00:00
2067    2019-05-07 01:00:00+00:00
Name: datetime, Length: 2068, dtype: object

In [8]:
air["datetime"] = pd.to_datetime(air["datetime"])
air["datetime"]

0      2019-06-21 00:00:00+00:00
1      2019-06-20 23:00:00+00:00
2      2019-06-20 22:00:00+00:00
3      2019-06-20 21:00:00+00:00
4      2019-06-20 20:00:00+00:00
                  ...           
2063   2019-05-07 06:00:00+00:00
2064   2019-05-07 04:00:00+00:00
2065   2019-05-07 03:00:00+00:00
2066   2019-05-07 02:00:00+00:00
2067   2019-05-07 01:00:00+00:00
Name: datetime, Length: 2068, dtype: datetime64[ns, UTC]

### Before conversion (top):

### dtype: object - This means the datetime column is stored as strings (text), not actual datetime objects
### Python/pandas treats these as generic text, so you can't do datetime-specific operations efficiently

### After conversion (bottom):

### dtype: datetime64[ns, UTC] - Now it's stored as actual datetime objects with nanosecond precision and UTC timezone
### You can now do datetime math, filtering, resampling, etc. much faster and easier

# ------------------------------------------------------------------------------------------------------------
### The start and end date of the time series data set we are working with?
# -------------------------------------------------------------------------------------------------------------

In [9]:
air["datetime"].min(),air["datetime"].max()

(Timestamp('2019-05-07 01:00:00+0000', tz='UTC'),
 Timestamp('2019-06-21 00:00:00+0000', tz='UTC'))