<a href="https://colab.research.google.com/github/ASAzimy/SamimProject/blob/main/FirstImplementation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**🎓 Thesis Deep Learning Implementation: MSFT + CapsNet + Neural ODEs**

**✅ Step-by-Step Instructions with Code (Google Colab)**

**📁 Step 1: Upload and Load MSFT Dataset**

In [5]:
from google.colab import files
import pandas as pd

# Upload the file (will prompt you to select file)
uploaded = files.upload()

# Get the filename (assuming only one file uploaded)
filename = next(iter(uploaded))

# Read the file
df = pd.read_csv(filename, skiprows=3, header=None,
                 names=['Date', 'Close', 'High', 'Low', 'Open', 'Volume'])

# Convert date and numeric columns
df['Date'] = pd.to_datetime(df['Date'])
numeric_cols = ['Open', 'High', 'Low', 'Close', 'Volume']
df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric)

# Set Date as index and sort
df = df.set_index('Date').sort_index()

print(df.head())

Saving MSFT_1986_2025-06-30.csv to MSFT_1986_2025-06-30.csv
               Close      High       Low      Open      Volume
Date                                                          
1986-03-13  0.059598  0.062259  0.054277  0.054277  1031788800
1986-03-14  0.061726  0.062791  0.059598  0.059598   308160000
1986-03-17  0.062791  0.063323  0.061726  0.061726   133171200
1986-03-18  0.061194  0.063323  0.060662  0.062791    67766400
1986-03-19  0.060130  0.061726  0.059598  0.061194    47894400


In [7]:
from google.colab import files
import pandas as pd

# Upload the file (will prompt you to select file)
uploaded = files.upload()

# Get the filename (assuming only one file uploaded)
filename = next(iter(uploaded))

# Read the file
df = pd.read_csv(filename)

# Convert date and numeric columns
df['Date'] = pd.to_datetime(df['Date'])
numeric_cols = ['Open', 'High', 'Low', 'Close', 'Volume']
df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric)

# Set Date as index and sort
df = df.set_index('Date').sort_index()

print(df.head())

Saving MSFT_1986_2025-06-30.csv to MSFT_1986_2025-06-30 (1).csv
               Close      High       Low      Open      Volume
Date                                                          
1986-03-13  0.059598  0.062259  0.054277  0.054277  1031788800
1986-03-14  0.061726  0.062791  0.059598  0.059598   308160000
1986-03-17  0.062791  0.063323  0.061726  0.061726   133171200
1986-03-18  0.061194  0.063323  0.060662  0.062791    67766400
1986-03-19  0.060130  0.061726  0.059598  0.061194    47894400


In [9]:
import pandas as pd
import numpy as np

# --- 1. Load Data (After Uploading to Colab) ---

df = pd.read_csv(filename)

In [10]:
# --- 2. Convert Data Types ---
df['Date'] = pd.to_datetime(df['Date'])  # Ensure 'Date' is datetime
numeric_cols = ['Open', 'High', 'Low', 'Close', 'Volume']
df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric, errors='coerce')  # Convert to float

In [11]:
# --- 3. Data Validation & Cleaning ---
# Check for missing values
print("Missing Values Before Cleaning:")
print(df.isnull().sum())

Missing Values Before Cleaning:
Date      0
Close     0
High      0
Low       0
Open      0
Volume    0
dtype: int64


In [12]:
# Forward-fill missing prices (common in stock data)
df[['Open', 'High', 'Low', 'Close']] = df[['Open', 'High', 'Low', 'Close']].ffill()
df['Volume'] = df['Volume'].fillna(0)  # Fill missing volume with 0

In [13]:
# Validate price consistency (High ≥ Low, Open within range)
df = df[(df['High'] >= df['Low']) & (df['Open'] >= df['Low']) & (df['Open'] <= df['High'])]

In [14]:
# --- 4. Feature Engineering (Stock-Specific Features) ---
# Daily Returns & Cumulative Returns
df['Daily_Return'] = df['Close'].pct_change() * 100  # Percentage return
df['Cumulative_Return'] = (1 + df['Daily_Return'] / 100).cumprod() - 1

# Moving Averages (Trend Indicators)
df['SMA_20'] = df['Close'].rolling(20).mean()  # 20-day Simple Moving Avg
df['EMA_12'] = df['Close'].ewm(span=12, adjust=False).mean()  # 12-day Exponential MA

# Volatility (Daily Range & Rolling Std Dev)
df['Daily_Range'] = (df['High'] - df['Low']) / df['Close'] * 100  # % Range
df['Volatility_30D'] = df['Close'].rolling(30).std() * np.sqrt(30)  # Annualized

In [15]:
# --- 5. Final Structure & Export ---
df = df.set_index('Date').sort_index()  # Set 'Date' as index & sort
df.dropna(inplace=True)  # Drop rows with NaN (due to rolling calcs)

print("\n✅ Preprocessing Complete!")
print(f"Final Data Shape: {df.shape}")
print("\nSample Data:")
print(df.head())

# Save cleaned data (optional)
df.to_csv('MSFT_Preprocessed.csv')
print("\n📁 Cleaned data saved as 'MSFT_Preprocessed.csv'")


✅ Preprocessing Complete!
Final Data Shape: (9871, 11)

Sample Data:
               Close      High       Low      Open    Volume  Daily_Return  \
Date                                                                         
1986-04-24  0.067580  0.068644  0.061194  0.061460  62352000      9.957188   
1986-04-25  0.071837  0.074763  0.068112  0.068112  85795200      6.299616   
1986-04-28  0.072369  0.072901  0.071304  0.071837  28886400      0.740713   
1986-04-29  0.070240  0.072369  0.069708  0.072369  30326400     -2.941812   
1986-04-30  0.068644  0.070772  0.067047  0.070240  30902400     -2.272597   

            Cumulative_Return    SMA_20    EMA_12  Daily_Range  Volatility_30D  
Date                                                                            
1986-04-24           0.133932  0.060808  0.062379    11.023828        0.014053  
1986-04-25           0.205365  0.061447  0.063834     9.259483        0.018203  
1986-04-28           0.214293  0.062139  0.065147     2.206