**<center><font size = "5">Expanding Time<center>**
**<center><font size = "4">Get more from a low-dimensional dataframe through the Time Column<center>**
***
<center><font size = "2">Prepared by: Neha Nooka<center>

## Load in Libraries

In [1]:
# Data Handling
import numpy as np
import pandas as pd

# Data visualization Libraries
import plotly.express as px
import plotly.io as pio
import plotly.graph_objects as go
pio.renderers.default='notebook'

#Misc
import warnings
warnings.filterwarnings("ignore")

## Load Data

In [2]:
df=pd.read_csv('weatherData.csv')
df.head()

Unnamed: 0,station_key,datetime,AirTempCelsius
0,Whitefish N,2019-01-01,-8.936042
1,Whitefish N,2019-01-02,-5.57125
2,Whitefish N,2019-01-03,-2.245417
3,Whitefish N,2019-01-04,2.318125
4,Whitefish N,2019-01-05,-1.549167


# Initial Data Exploration

## Observed Temperatures over time by Station

In [5]:
# Temperature Patterns - Weather Stations:
plot = px.scatter(df[df['station_key']=='Whitefish N'], x='datetime'
                 ,y='AirTempCelsius')
plot.update_layout(
title={ 'text': "Temperature Recordings over Time\
                    <br><sup>Whitefish N, MT Weather Station</sup>",
       'xanchor':'left',
       'yanchor':'top',
       'x':0.1},
    xaxis_title='',
    yaxis_title="Temperature in Celsius",
    legend_title_text='Weather_Station:'
)
plot.show()

In [6]:
# Temperature Patterns - Weather Stations:
plot = px.scatter(df[df['station_key'] == 'Harding Cutoff'], x='datetime',
                  y='AirTempCelsius')
plot.update_layout(
    title={'text': "Temperature Recordings over Time\
                    <br><sup>Harding Cutoff, SD Weather Station</sup>",
           'xanchor': 'left',
           'yanchor': 'top',
           'x': 0.1},
    xaxis_title='',
    yaxis_title='Temperature in Celsius',
    legend_title_text='Weather Station:')
plot.show()

In [7]:
# Temperature Patterns - Weather Stations:
plot = px.scatter(df, x='datetime',
                  y='AirTempCelsius',
                  color='station_key')
plot.update_layout(
    title={'text': "Temperature Recordings over Time\
                    <br><sup>Whitefish N, MT and Harding Cutoff,\
                    SD Weather Stations</sup>",
           'xanchor': 'left',
           'yanchor': 'top',
           'x': 0.1},
    xaxis_title='',
    yaxis_title='Temperature in Celsius',
    legend_title_text='Weather Station:')
plot.show()

## Frequency of Temperature Observations by Station

In [8]:
# Generate plot:
plot= px.histogram(df,'AirTempCelsius',
                  color='station_key',barmode='overlay')
plot.update_layout(
    title={'text': "Number of Occurrences of Each Temperature (Celsius)\
                    <br><sup>Whitefish N, MT and Harding Cutoff, SD\
                    Weather Stations</sup>",
           'xanchor': 'left',
           'yanchor': 'top',
           'x': 0.1},
    xaxis_title='Temperature in Celsius',
    yaxis_title='Count',
    legend_title_text='Weather Station:')
plot.show()


# Unpack 'datetime' Column

## Examples of Possibilities

In [11]:
# Set datetime column as pandas dattetime data:
df['datetime'] = pd.to_datetime(df['datetime'])

## Data Preparation

In [13]:
# Extract features from the time column and add to the dataframe:

# Day name column (example: Sunday):
df['DayName'] = df['datetime'].dt.day_name()

# Day number column:
df['Day'] = df['datetime'].dt.day

# Month name column (example: February):
df['MonthName'] = df['datetime'].dt.month_name()

# Month number column:
df['Month'] = df['datetime'].dt.month

# Year:
df['Year'] = df['datetime'].dt.year

# Julian date:
df['JulianDate'] = df['datetime'].dt.strftime('%j')

# Classify seasons:
df['Season'] = ['Winter' if x == 'December' else
                'Winter' if x == 'January' else
                'Winter' if x == 'February' else
                'Spring' if x == 'March' else
                'Spring' if x == 'April' else
                'Spring' if x == 'May' else
                'Summer' if x == 'June' else
                'Summer' if x == 'July' else
                'Summer' if x == 'August' else
                'Fall' if x == 'September' else
                'Fall' if x == 'October' else
                'Fall' if x == 'November' else
                'NaN' for x in df['MonthName']]

# View upated dataframe:
df.head()

Unnamed: 0,station_key,datetime,AirTempCelsius,DayName,Day,MonthName,Month,Year,JulianDate,Season
0,Whitefish N,2019-01-01,-8.936042,Tuesday,1,January,1,2019,1,Winter
1,Whitefish N,2019-01-02,-5.57125,Wednesday,2,January,1,2019,2,Winter
2,Whitefish N,2019-01-03,-2.245417,Thursday,3,January,1,2019,3,Winter
3,Whitefish N,2019-01-04,2.318125,Friday,4,January,1,2019,4,Winter
4,Whitefish N,2019-01-05,-1.549167,Saturday,5,January,1,2019,5,Winter


# Revisit Temperature Over Time

In [14]:
# Show one station with seasons plotted:
plot = px.scatter(df[df['station_key'] == 'Whitefish N'],
                  x='datetime', y='AirTempCelsius', color='Season',
                  color_discrete_sequence=["#3366cc", "#109618", "#d62728",
                  "#ff9900"])
plot.update_layout(
    title={'text': "Temperature Patterns by Season\
                    <br><sup>Data from Whitefish N, MT Weather Station</br>",
           'xanchor': 'left',
           'yanchor': 'top',
           'x': 0.1},
    xaxis_title='',
    yaxis_title='Temperature in Celsius',
    legend_title_text='Season:')
plot.show()

## Compare Seasonal Temperature Recordings

In [15]:
# Generate plot:
plot = px.histogram(df, x='AirTempCelsius', color='station_key',
                    barmode='overlay', facet_row='Season')
plot.update_layout(title={'text': "Temperature Recordings, 2019 to 2022\
                                  <br><sup>Whitefish N, MT and Harding \
                                  Cutoff, SD Weather Stations</sup>",
                          'xanchor': 'left',
                          'yanchor': 'top',
                          'x': 0.1}, legend_title_text='Month',
                   xaxis_title='Recorded Temperature')
plot.update_layout(legend_title_text='Weather Station:')
plot.update_yaxes(title="")
plot.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
plot.show()

## Compare Year over Year with Julian date

In [17]:
# Prep data:
df1 = df[(df['Year']==2021) | (df['Year']==2022)]
df1.sort_values(by=['JulianDate'],inplace=True)

In [18]:
df1.head()

Unnamed: 0,station_key,datetime,AirTempCelsius,DayName,Day,MonthName,Month,Year,JulianDate,Season
731,Whitefish N,2021-01-01,-0.218229,Friday,1,January,1,2021,1,Winter
2187,Harding Cutoff,2021-01-01,-0.434688,Friday,1,January,1,2021,1,Winter
2552,Harding Cutoff,2022-01-01,-21.971458,Saturday,1,January,1,2022,1,Winter
1096,Whitefish N,2022-01-01,-20.180729,Saturday,1,January,1,2022,1,Winter
732,Whitefish N,2021-01-02,0.695625,Saturday,2,January,1,2021,2,Winter


In [19]:
# Generate Plot
# Generate plot:
plot = px.line(df1[df1['station_key'] == 'Whitefish N'],
               y="AirTempCelsius", x="JulianDate", color="Year",
               color_discrete_sequence=["#3366cc", "#d62728"])
plot.update_layout(title={'text': "Summer Temperature Recordings 2021 and 2022\
                                  <br><sup>Whitefish N Weather Stations</sup>",
                          'xanchor': 'left',
                          'yanchor': 'top',
                          'x': 0.1}, legend_title_text='Month',
                   xaxis_title='Julian Date',
                   yaxis_title='Temperature in Degrees Celsius')
plot.update_layout(legend_title_text='Weather Station:')
plot.show()

## Compare Summer 2020 Between Two Stations

In [20]:
# Prep Data:
df1 = df[df['Year'] == 2020]
df1.sort_values(by=['JulianDate'], inplace=True)

# Generate plot:
plot = px.line(df1[df1['Season'] == 'Summer'],
               y="AirTempCelsius", x="JulianDate", color="station_key",
               color_discrete_sequence=["#3366cc", "#d62728"])
plot.update_layout(title={'text': "Summer Temperature Recordings, 2020\
                                  <br><sup>Whitefish N, MT and Harding \
                                  Cutoff, SD Weather Stations</sup>",
                          'xanchor': 'left',
                          'yanchor': 'top',
                          'x': 0.1}, legend_title_text='Month',
                   xaxis_title='Julian Date',
                   yaxis_title='Temperature in Degrees Celsius')
plot.update_layout(legend_title_text='Weather Station:')
plot.show()

## Compare Yearly Temperature Distributions using 'datetime' Column

In [21]:
# Generate plot:
plot = px.box(df, x=df.datetime.dt.year, color='station_key', y='AirTempCelsius',
              color_discrete_sequence=px.colors.cyclical.IceFire)
plot.update_layout(title={'text': "Yearly Temperature Distributions\
                                  <br><sup>Whitefish N, MT and Harding Cutoff, SD Weather Stations</sup>",
                          'xanchor': 'left',
                          'yanchor': 'top',
                          'x': 0.1}, legend_title_text='Month',
                   xaxis_title='',
                   yaxis_title='Temperature in Degrees Celsius')
plot.update_layout(legend_title_text='Year')
plot.show()