# Level 3 
## Task 1: Time Series Forecasting

Welcome to the notebook version of **Task 1: Time Series Forecasting**.  
In this task, we’ll apply classic time series techniques to forecast a chosen metric from a cleaned dataset.


In [16]:

import os
import sys
import ipywidgets as widgets

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error

from IPython.display import display, Markdown

In [17]:
# Detect current notebook directory
notebook_dir = os.getcwd()
root_dir = os.path.abspath(os.path.join(notebook_dir, ".."))  # adjust as needed

# Add root directory to sys.path if not already included
if root_dir not in sys.path:
    sys.path.append(root_dir)

# Construct cleaned data directory and print debug info
cleaned_dir = os.path.join(root_dir, "data", "cleaned")
print("📁 Notebook Directory:", notebook_dir)
print("📂 Root Directory:", root_dir)
print("🗂️ Cleaned Data Path:", cleaned_dir)

# -------------------- 📁 Detect Available CSV Files --------------------
try:
    csv_files = sorted([f for f in os.listdir(cleaned_dir) if f.endswith(".csv")])
    if not csv_files:
        raise FileNotFoundError("No CSV files found in 'data/cleaned/'.")
except FileNotFoundError as e:
    print(f"❌ {e}")
    raise

📁 Notebook Directory: e:\CODveda\codveda-internship\notebooks
📂 Root Directory: e:\CODveda\codveda-internship
🗂️ Cleaned Data Path: e:\CODveda\codveda-internship\data\cleaned


In [18]:
# -------------------- Dataset Selection Dropdown --------------------

# Default to stock_prices_cleaned.csv if available
default_file = "stock_prices_cleaned.csv"
default_value = default_file if default_file in csv_files else csv_files[0]

dataset_selector = widgets.Dropdown(
    options=csv_files,
    value=default_value,
    description="Dataset:",
    layout=widgets.Layout(width="50%")
)


## Load Cleaned Time Series Dataset

Use the dropdown below to choose a cleaned CSV dataset (with a date column) for time series analysis.

In [19]:
# -------------------- Load Dataset Helper --------------------
def load_dataset(file_name):
    path = os.path.join(cleaned_dir, file_name)
    df = pd.read_csv(path)

    # Try to detect a date column automatically
    date_col = next((col for col in df.columns if "date" in col.lower()), None)

    if date_col:
        df[date_col] = pd.to_datetime(df[date_col])
        df = df.set_index(date_col).sort_index()
    else:
        raise ValueError("No 'date' column found in the dataset.!")

    return df

# -------------------- Output Preview Area --------------------
output_area = widgets.Output()

def on_dataset_change(change):
    with output_area:
        output_area.clear_output()
        try:
            df = load_dataset(change['new'])
            display(Markdown(f"### Preview of `{change['new']}`"))
            display(df.head())
        except Exception as e:
            print(f"Error loading dataset: {e}.!!!")

dataset_selector.observe(on_dataset_change, names='value')

# -------------------- Initial Dataset Preview --------------------
with output_area:
    df_initial = load_dataset(dataset_selector.value)
    display(Markdown(f"### Preview of `{dataset_selector.value}`"))
    display(df_initial.head())

# -------------------- Display Dataset Selector --------------------
display(Markdown("## Load Cleaned Time Series Dataset"))
display(dataset_selector)
display(output_area)


## Load Cleaned Time Series Dataset

Dropdown(description='Dataset:', index=4, layout=Layout(width='50%'), options=('churn-bigml-20_cleaned.csv', '…

Output()

In [20]:
# -------------------- Time Series Column Selection --------------------

# Get numeric columns (excluding index)
numeric_columns = df_initial.select_dtypes(include=[np.number]).columns.tolist()

# Warn if none found
if not numeric_columns:
    raise ValueError("No numeric columns found for time series forecasting.!!!")

# Dropdown for time series column
target_selector = widgets.Dropdown(
    options=numeric_columns,
    value=numeric_columns[0],
    description="Target:",
    layout=widgets.Layout(width="50%")
)

# Output display
plot_output = widgets.Output()

# Plot function
def plot_time_series(series, label=None):
    plt.figure(figsize=(10, 4))
    plt.plot(series, label=label or series.name)
    plt.xlabel("Date")
    plt.ylabel("Value")
    plt.title(f"Time Series Plot – {label}")
    plt.grid(True)
    if label:
        plt.legend()
    plt.tight_layout()
    plt.show()

def on_target_change(change):
    with plot_output:
        plot_output.clear_output()
        selected_target = change["new"]
        ts_series = df_initial[selected_target].dropna()
        display(Markdown(f"### Plotting `{selected_target}` Time Series"))
        plot_time_series(ts_series, label=selected_target)

target_selector.observe(on_target_change, names="value")

# Initial plot
with plot_output:
    initial_target = target_selector.value
    display(Markdown(f"### Plotting `{initial_target}` Time Series"))
    plot_time_series(df_initial[initial_target], label=initial_target)

# Display UI
display(Markdown("## Select Time Series Target"))
display(target_selector)
display(plot_output)


## Select Time Series Target

Dropdown(description='Target:', layout=Layout(width='50%'), options=('open', 'high', 'low', 'close', 'volume')…

Output()