In [8]:
import pandas as pd
import os

# Load cleaned data
data_path = os.path.join("..", "data", "coca_cola_stock_cleaned.csv")
data = pd.read_csv(data_path)

In [9]:
# Fix date parsing
data['Date'] = pd.to_datetime(data['Date'], errors='coerce')
data.dropna(subset=['Date'], inplace=True)

# ✅ Convert only the numeric columns that exist
numeric_cols = ['Open', 'High', 'Low', 'Close', 'Volume']
for col in numeric_cols:
    if col in data.columns:
        data[col] = pd.to_numeric(data[col], errors='coerce')

# Drop rows with NaNs in critical columns
data.dropna(subset=numeric_cols, inplace=True)

  data['Date'] = pd.to_datetime(data['Date'], errors='coerce')


In [10]:
# 📊 Feature Engineering
data['MA_20'] = data['Close'].rolling(window=20).mean()
data['MA_50'] = data['Close'].rolling(window=50).mean()
data['Daily_Return'] = data['Close'].pct_change()
data['Volatility'] = data['Daily_Return'].rolling(window=20).std()

In [11]:
data.dropna(inplace=True)
data.reset_index(drop=True, inplace=True)

In [12]:
engineered_path = os.path.join("..", "data", "coca_cola_stock_engineered.csv")
data.to_csv(engineered_path, index=False)

In [13]:
print(data[['Date', 'Close', 'MA_20', 'MA_50', 'Daily_Return', 'Volatility']].head())
print(f"\n✅ Feature-engineered data saved to: {engineered_path}")

        Date      Close      MA_20      MA_50  Daily_Return  Volatility
0 2015-03-16  29.135576  29.976337  30.228816      0.009521    0.011377
1 2015-03-17  29.294676  29.941773  30.210258      0.005461    0.011464
2 2015-03-18  29.359739  29.913332  30.193000      0.002221    0.011486
3 2015-03-19  28.940323  29.850293  30.162765     -0.014285    0.011604
4 2015-03-20  29.395910  29.815053  30.134038      0.015742    0.012260

✅ Feature-engineered data saved to: ..\data\coca_cola_stock_engineered.csv
