## Prerequisites
---


In [None]:
!pip install ydata-profiling
!pip install plotly

In [None]:
!wget https://raw.githubusercontent.com/Lolimipsu/ML_Tutorials/refs/heads/main/freecodecamp/Page%20View%20Time%20Series%20Visualizer/fcc-forum-pageviews.csv

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

import plotly.express as px
from ydata_profiling import ProfileReport

In [None]:
from IPython.display import display
pd.options.display.max_columns = None

## Task
---

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

from datetime import datetime

Use Pandas to import the data from fcc-forum-pageviews.csv. Set the index to the date column.

In [None]:
def parse_date(x):
  for fmt in ('%m-%d-Y', '%d %m %Y', '%Y-%m-%d'):
    try:
      return datetime.strptime(x, fmt)
    except ValueError:
      continue
  raise ValueError(f'No valid date format found for {x}')


df = pd.read_csv(
    'fcc-forum-pageviews.csv',
    index_col = ['date'],
    parse_dates=['date'],
    date_parser=parse_date
)

 Clean the data by filtering out days when the page views were in the top 2.5% of the dataset or bottom 2.5% of the dataset.

In [None]:
df = df.loc[
    (df['value'] >= df['value'].quantile(0.025)) &
    (df['value'] <= df['value'].quantile(0.975))
]

Create a draw_line_plot function that uses Matplotlib to draw a line chart similar to examples/Figure_1.png.

In [None]:
def draw_line_plot():
    # Draw line plot
    fig, ax = plt.subplots(figsize=(18,6))

    ax = sns.lineplot(data=df, x='date', y='value', color='green')
    ax.set(
        xlabel='Date',
        ylabel='Page views'
    )

    ax.set_title('Daily freeCodeCamp Forum Page Views 5/2016-12/2019')
    # Save image and return fig (don't change this part)
    fig.savefig('line_plot.png')
    return fig

In [None]:
draw_line_plot()

 Create a draw_bar_plot function that draws a bar chart similar to examples/Figure_2.png.

In [None]:
def draw_bar_plot():
    # Copy and modify data for monthly bar plot
    df_bar = df.copy()
    df_bar['year'] = df_bar.index.year
    df_bar['month'] = df_bar.index.month

    df_bar = df_bar.groupby(['year', 'month'])['value'].mean().unstack()

    # Draw bar plot
    fig, ax = plt.subplots(figsize=(15, 10))
    df_bar.plot(kind='bar', ax=ax)

    ax.set_xlabel('Years')
    ax.set_ylabel('Average Page Views')
    ax.legend(
        title='Months',
        labels=[
            'January', 'February', 'March', 'April', 'May', 'June',
            'July', 'August', 'September', 'October', 'November', 'December'
        ]
    )

    # Save image and return fig (don't change this part)
    fig.savefig('bar_plot.png')
    return fig


In [None]:
draw_bar_plot()

Create a draw_box_plot function that uses Seaborn to draw two adjacent box plots similar to examples/Figure_3.png

## Code Submission
---

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

# Import data (Make sure to parse dates. Consider setting index column to 'date'.)
df = None

# Clean data
df = None


def draw_line_plot():
    # Draw line plot





    # Save image and return fig (don't change this part)
    fig.savefig('line_plot.png')
    return fig

def draw_bar_plot():
    # Copy and modify data for monthly bar plot
    df_bar = None

    # Draw bar plot





    # Save image and return fig (don't change this part)
    fig.savefig('bar_plot.png')
    return fig

def draw_box_plot():
    # Prepare data for box plots (this part is done!)
    df_box = df.copy()
    df_box.reset_index(inplace=True)
    df_box['year'] = [d.year for d in df_box.date]
    df_box['month'] = [d.strftime('%b') for d in df_box.date]

    # Draw box plots (using Seaborn)





    # Save image and return fig (don't change this part)
    fig.savefig('box_plot.png')
    return fig


## Dataset analysis
---

In [None]:
profiling = pd.read_csv('fcc-forum-pageviews.csv')

In [None]:
profiling.info()

In [None]:
profile = ProfileReport(profiling, title="Pandas Profiling Report")
profile