In [9]:
# Essential Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import zipfile
import os

# Sentiment and Technical Analysis Tools
from textblob import TextBlob
import pandas_ta as talib
import datetime


In [10]:

# Paths
zip_path = 'C:/Users/dell/Downloads/yfinance_data.zip'
extract_to = 'C:/Users/dell/Downloads/yfinance_data'  # Folder to extract to

# Create directory if it doesn't exist
os.makedirs(extract_to, exist_ok=True)

# Extract the ZIP
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_to)

# Now read the CSV
csv_path = f'{extract_to}/yfinance_data/AAPL_historical_data.csv'
stock_df = pd.read_csv(csv_path)

print(stock_df.head())

         Date      Open      High       Low     Close  Adj Close     Volume  \
0  1980-12-12  0.128348  0.128906  0.128348  0.128348   0.098943  469033600   
1  1980-12-15  0.122210  0.122210  0.121652  0.121652   0.093781  175884800   
2  1980-12-16  0.113281  0.113281  0.112723  0.112723   0.086898  105728000   
3  1980-12-17  0.115513  0.116071  0.115513  0.115513   0.089049   86441600   
4  1980-12-18  0.118862  0.119420  0.118862  0.118862   0.091630   73449600   

   Dividends  Stock Splits  
0        0.0           0.0  
1        0.0           0.0  
2        0.0           0.0  
3        0.0           0.0  
4        0.0           0.0  


In [11]:

# Paths
zip_path = 'C:/Users/dell/Downloads/raw_analyst_ratings.csv.zip'
extract_to = 'C:/Users/dell/Downloads/extracted_data'  # Folder to extract to

# Extract the ZIP
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_to)

# Now read the CSV
csv_path = f'{extract_to}/raw_analyst_ratings.csv'  # Adjust if filename is different
ratings_df = pd.read_csv(csv_path)

print(ratings_df.head())

   Unnamed: 0                                           headline  \
0           0            Stocks That Hit 52-Week Highs On Friday   
1           1         Stocks That Hit 52-Week Highs On Wednesday   
2           2                      71 Biggest Movers From Friday   
3           3       46 Stocks Moving In Friday's Mid-Day Session   
4           4  B of A Securities Maintains Neutral on Agilent...   

                                                 url          publisher  \
0  https://www.benzinga.com/news/20/06/16190091/s...  Benzinga Insights   
1  https://www.benzinga.com/news/20/06/16170189/s...  Benzinga Insights   
2  https://www.benzinga.com/news/20/05/16103463/7...         Lisa Levin   
3  https://www.benzinga.com/news/20/05/16095921/4...         Lisa Levin   
4  https://www.benzinga.com/news/20/05/16095304/b...         Vick Meyer   

                        date stock  
0  2020-06-05 10:30:54-04:00     A  
1  2020-06-03 10:45:20-04:00     A  
2  2020-05-26 04:30:07-04:00 

# 3. Basic EDA

In [12]:
# Stock Data Overview
print(stock_df.head())
print(stock_df.info())

# Ratings Dataset Overview
print(ratings_df.head())
print(ratings_df['publisher'].value_counts())
ratings_df['headline_length'] = ratings_df['headline'].apply(len)


         Date      Open      High       Low     Close  Adj Close     Volume  \
0  1980-12-12  0.128348  0.128906  0.128348  0.128348   0.098943  469033600   
1  1980-12-15  0.122210  0.122210  0.121652  0.121652   0.093781  175884800   
2  1980-12-16  0.113281  0.113281  0.112723  0.112723   0.086898  105728000   
3  1980-12-17  0.115513  0.116071  0.115513  0.115513   0.089049   86441600   
4  1980-12-18  0.118862  0.119420  0.118862  0.118862   0.091630   73449600   

   Dividends  Stock Splits  
0        0.0           0.0  
1        0.0           0.0  
2        0.0           0.0  
3        0.0           0.0  
4        0.0           0.0  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10998 entries, 0 to 10997
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Date          10998 non-null  object 
 1   Open          10998 non-null  float64
 2   High          10998 non-null  float64
 3   Low           10998 non

# 4. Sentiment Analysis (Task 3)

In [None]:
# Sentiment score.
ratings_df['sentiment'] = ratings_df['headline'].apply(lambda x: TextBlob(str(x)).sentiment.polarity)

# Safely convert to datetime.
ratings_df['date'] = pd.to_datetime(ratings_df['date'], errors='coerce')

# Drop any rows where datetime conversion failed.
ratings_df = ratings_df.dropna(subset=['date'])

# Extract just the date.
ratings_df['date'] = ratings_df['date'].dt.date

# Daily average sentiment.
daily_sentiment = ratings_df.groupby('date')['sentiment'].mean().reset_index()


# 5. Time Series Technical Analysis (Task 2)

In [11]:
pip install pandas_ta --upgrade


Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [8]:
import pandas_ta as ta
stock_df = pd.read_csv(csv_path)

# Convert stock date and compute daily return
stock_df['Date'] = pd.to_datetime(stock_df['Date'])
stock_df['daily_return'] = stock_df['Close'].pct_change()

# Technical Indicators using pandas_ta
stock_df['SMA_10'] = stock_df['Close'].ta.sma(length=10)
stock_df['RSI'] = stock_df['Close'].ta.rsi(length=14)
macd = stock_df['Close'].ta.macd()
stock_df['MACD'] = macd['MACD_12_26_9']
stock_df['MACD_signal'] = macd['MACDs_12_26_9']

# Visualize
plt.figure(figsize=(12, 6))
plt.plot(stock_df['Date'], stock_df['Close'], label='Close Price')
plt.plot(stock_df['Date'], stock_df['SMA_10'], label='SMA_10')
plt.legend()
plt.title('Stock Price with SMA')
plt.show()


NameError: name 'csv_path' is not defined

In [4]:
!pip install --upgrade pandas_ta


Defaulting to user installation because normal site-packages is not writeable
