## **Trying Out**

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv(
    'fcc-forum-pageviews.csv',
    dtype={'value': 'int'},
    parse_dates=[0],
    index_col=[0]
)
df

In [None]:
# data cleaning
df = df.loc[
    (df['value'] <= df['value'].quantile(0.975)) &
    (df['value'] >= df['value'].quantile(0.025))
]
df.size

In [None]:
# Line Graph
df_line = df.copy()
fig1, ax1 = plt.subplots(figsize=(15, 5))
ax1.plot(df_line.index, df_line['value'])
ax1.set_title('Daily freeCodeCamp Forum Page Views 5/2016-12/2019')
ax1.set_ylabel('Page Views')
ax1.set_xlabel('Date')

In [None]:
# Bar Graph
df_bar = df.copy()
# create new columns
df_bar['Year'] = df_bar.index.year
df_bar['Month'] = df_bar.index.month
bar_graph = df_bar.groupby(['Year', 'Month'])['value'].mean().unstack()
bar_graph

In [None]:
fig2, ax2 = plt.subplots(figsize=(10, 6))
bar_graph.plot(kind='bar', ax=ax2)
ax2.set_ylabel('Average Page Views')
ax2.set_xlabel('Years')
ax2.legend(
    title='Months',
    labels=['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
)

In [None]:
# box plots
df_box = df.copy()
df_box.reset_index(inplace=True)
df_box['year'] = [d.year for d in df_box.date]
df_box['month'] = [d.strftime('%b') for d in df_box.date]
df_box

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(15, 6))
# first plot
sns.boxplot(
    data=df_box,
    x='year',
    y='value',
    ax=ax[0]
)
ax[0].set_title('Year-wise Box Plot (Trend)')
ax[0].set_xlabel('Year')
ax[0].set_ylabel('Page Views')

# second plot
order = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
sns.boxplot(
    data=df_box,
    x='month',
    y='value',
    ax=ax[1],
    order=order
)
ax[1].set_title('Month-wise Box Plot (Seasonality)')
ax[1].set_xlabel('Month')
ax[1].set_ylabel('Page Views')