In [25]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [26]:
import pandas as pd

def clean_and_fill_missing_dates(input_path: str) -> pd.DataFrame:
    """
    Cleans the dataset by filling missing dates with the last available data.

    Parameters:
    input_path (str): Path of input CSV file.

    Returns:
    pd.DataFrame: The cleaned dataset.
    """
    data = pd.read_csv(input_path, skiprows=2)

    # Ensure the "Date" column is properly parsed as datetime
    data['Date'] = pd.to_datetime(data['Date'])

    # Sort the data by date to ensure chronological order
    data.sort_values('Date', inplace=True)

    # Create a complete date range from the earliest to the latest date
    full_date_range = pd.date_range(start=data['Date'].min(), end=data['Date'].max())

    # Reindex the DataFrame to include all dates and fill missing rows with the last available data
    data = data.set_index('Date').reindex(full_date_range, method='ffill').reset_index()
    data.rename(columns={'index': 'Date'}, inplace=True)
 
    data.columns = ['Date', 'Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume']

    return data

In [None]:
data1=clean_and_fill_missing_dates('zero_coupon_etf.csv')

In [28]:
data1.columns

Index(['Date', 'Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume'], dtype='object')

In [29]:
data1.head(10)

Unnamed: 0,Date,Adj Close,Close,High,Low,Open,Volume
0,2014-11-03,84.266884,108.839996,109.279999,107.940002,109.279999,7200
1,2014-11-04,84.770119,109.489998,110.160004,109.110001,109.790001,13100
2,2014-11-05,84.561111,109.220001,109.410004,108.650002,108.769997,2500
3,2014-11-06,83.655266,108.050003,108.529999,108.010002,108.519997,8400
4,2014-11-07,84.69268,109.389999,109.589996,108.57,108.57,58700
5,2014-11-08,84.69268,109.389999,109.589996,108.57,108.57,58700
6,2014-11-09,84.69268,109.389999,109.589996,108.57,108.57,58700
7,2014-11-10,83.686195,108.089996,109.300003,108.089996,109.300003,3800
8,2014-11-11,83.802353,108.239998,108.5,107.510002,108.5,8300
9,2014-11-12,83.616539,108.0,109.5,108.0,109.349998,9300
