In [5]:
import pandas as pd
import os

# Load the transformed EDA data
df = pd.read_csv("../Data/EDA_transformed.csv")

# Ensure 'Date' is datetime
df['Date'] = pd.to_datetime(df['Date'])

# Remove duplicates and sort
df = df.drop_duplicates().sort_values(['Stocks', 'Date'])

# Create output folder for Prophet-ready data
os.makedirs("Prophet_Preprocessed", exist_ok=True)

# Prophet expects columns: 'ds' (date), 'y' (target)
for stock in df['Stocks'].unique():
    stock_df = df[df['Stocks'] == stock].copy()
    stock_df = stock_df.drop_duplicates(subset='Date')
    stock_df = stock_df.sort_values('Date')
    stock_df = stock_df.set_index('Date').asfreq('B')  # business day frequency

    # Interpolate missing values for all numeric columns
    numeric_cols = stock_df.select_dtypes(include='number').columns
    stock_df[numeric_cols] = stock_df[numeric_cols].interpolate(method='linear')
    stock_df[numeric_cols] = stock_df[numeric_cols].ffill()

    # Prepare Prophet format
    prophet_df = pd.DataFrame({
        'ds': stock_df.index,
        'y': stock_df['Close'] if 'Close' in stock_df.columns else stock_df.iloc[:, 0]
    })

    # Save to CSV
    prophet_df.to_csv(f"../Data/Prophet_Preprocessed/{stock}_prophet.csv", index=False)

print("✅ Prophet-ready data saved in  Prophet_Preprocessed/")

✅ Prophet-ready data saved in  Prophet_Preprocessed/
