In [None]:
# Import necessary libraries and modules
import pandas as pd
from scripts.data_loading import load_csv
from scripts.eda import calculate_text_length, count_by_column
from scripts.text_analysis import perform_sentiment_analysis, extract_keywords
from scripts.time_series_analysis import analyze_publication_trends
from scripts.visualization import plot_counts

# Load data
file_path = "../data/raw_analyst_ratings/raw_analyst_ratings.csv"
data = load_csv(file_path)
print(data['date'].head())

# EDA: Headline length analysis
data['headline_length'] = calculate_text_length(data, 'headline')
print(data['headline_length'].describe())

# EDA: Publisher activity
publisher_counts = count_by_column(data, 'publisher')
plot_counts(publisher_counts, title="Articles by Publisher", xlabel="Publisher", ylabel="Count")

# Sentiment Analysis
data['sentiment'] = perform_sentiment_analysis(data['headline'].tolist())
print(data['sentiment'].value_counts())

# Keyword Extraction
keywords = extract_keywords(data['headline'].tolist(), n_keywords=10)
print("Top Keywords:", keywords)

# Time Series Analysis
publication_trends = analyze_publication_trends(data, 'date')
publication_trends.plot(title="Publication Trends Over Time")
