In [1]:
%pip install yfinance

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 24.1.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [4]:
import yfinance as yf
import pandas as pd

class StockData:
    def __init__(self, ticker, start_date, end_date):
        self.ticker = ticker
        self.start_date = start_date
        self.end_date = end_date
        self.data = None

    def fetch_data(self):
        # Fetches historical stock data.
        stock = yf.Ticker(self.ticker)
        self.data = stock.history(start=self.start_date, end=self.end_date)
        # Convert the date index to the format YYYY-MM-DD
        self.data.index = self.data.index.strftime('%Y-%m-%d')
        return self.data

    def preprocess_data(self):
        # Cleans and preprocesses the stock data.
        if self.data is not None:
            # Handle missing values
            self.data.fillna(method='ffill', inplace=True)  # Forward fill
            self.data.dropna(inplace=True)  # Drop any remaining NaNs after forward fill

            # Ensure correct data types
            self.data = self.data.astype({
                'Open': 'float64', 'High': 'float64', 'Low': 'float64',
                'Close': 'float64', 'Volume': 'int64'
            })

            # Drop unnecessary columns
            self.data = self.data.drop(columns=['Dividends', 'Stock Splits'])
        return self.data

    def add_features(self):
        # Adds additional features to the stock data.
        if self.data is not None:
            self.data['50_MA'] = self.data['Close'].rolling(window=50).mean()
            self.data['Daily_Return'] = self.data['Close'].pct_change()

            # Handling Null values
            self.data.fillna(0, inplace=True)
        return self.data

    def add_company_column(self):
        """Adds a new column for the company ticker symbol."""
        if self.data is not None:
            self.data['Company'] = self.ticker
            self.data.reset_index(inplace=True)
            self.data = self.data[['Company'] + list(self.data.columns[:-1])]
        return self.data

    def basic_statistics(self):
        """Computes basic statistics for the stock data."""
        stats = None
        if self.data is not None:
            stats = {
                'mean': self.data['Close'].mean(),
                'median': self.data['Close'].median(),
                'std_dev': self.data['Close'].std()
            }
        return stats




In [5]:
def main():
    # Step-by-Step Data Analysis for AAPL
    aapl_data = StockData('AAPL', '2021-01-01', '2024-01-01')
    df_aapl = aapl_data.fetch_data()
    df_aapl = aapl_data.preprocess_data()
    df_aapl = aapl_data.add_features()
    df_aapl = aapl_data.add_company_column()

    # Step-by-Step Data Analysis for MSFT
    msft_data = StockData('MSFT', '2021-01-01', '2024-01-01')
    df_msft = msft_data.fetch_data()
    df_msft = msft_data.preprocess_data()
    df_msft = msft_data.add_features()
    df_msft = msft_data.add_company_column()

    # Combine DataFrames
    combined_df = pd.concat([df_aapl, df_msft])

    # Export Combined Data to CSV
    combined_df.to_csv('combined_stock_data1.csv', index=False)

    # Print Basic Statistics for Both Companies
    aapl_stats = aapl_data.basic_statistics()
    msft_stats = msft_data.basic_statistics()

    print("AAPL Stats:", aapl_stats)
    print("MSFT Stats:", msft_stats)



In [6]:
if __name__ == "__main__":
    main()

AAPL Stats: {'mean': 154.43879510238668, 'median': 151.29434204101562, 'std_dev': 20.226124192489465}
MSFT Stats: {'mean': 281.5549014270068, 'median': 278.026611328125, 'std_dev': 40.873772757813576}


  self.data.fillna(method='ffill', inplace=True)  # Forward fill
  self.data.fillna(method='ffill', inplace=True)  # Forward fill
