# Task-1: Exploratory Data Analysis (EDA) 

In [None]:
# This file demonstrates how the notebook would look.
# It imports functions from eda_functions.py and runs the analysis.

# -------------------------------------------------
# 1. Import Libraries and Functions
# -------------------------------------------------


import os
import sys
import pandas as pd

# Ensure the local 'src' package is importable when running in a notebook.
# Adjust the path if your notebook sits in a different subfolder.
# Add the project root (parent of 'src') to sys.path so 'import src.*' works.
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

from src.eda_functions import (load_data, clean_data,
                                 headline_length_stats, 
                                 plot_headline_length,
                                 publisher_counts, 
                                 plot_top_publishers,
                                 save_processed,monthly_trends,
                                 plot_monthly_trends)

# -------------------------------------------------
# 2. File Paths
# -------------------------------------------------
raw_file = r"D:\Python\Week-1\Data-Week-1\raw_analyst_ratings.csv"
out_file = r"D:\Python\Week-1\Data-Week-1\processed_analyst_ratings.csv"

# -------------------------------------------------
# 3. Load Dataset
# -------------------------------------------------
print("Loading dataset...")
df = load_data(raw_file)
print("Dataset loaded. Shape:", df.shape)

# -------------------------------------------------
# 4. Clean Dataset
# -------------------------------------------------
print("Cleaning dataset...")
df = clean_data(df)
print("Cleaned dataset shape:", df.shape)

# -------------------------------------------------
# 5. Descriptive Statistics
# -------------------------------------------------
print("Headline Length Statistics:")
print(headline_length_stats(df))

# Plot headline length
plot_headline_length(df)

# Publisher counts
print("Top Publishers:")
print(publisher_counts(df).head(20))

plot_top_publishers(df, top_n=20)

# -------------------------------------------------
# 6. Publication Date Trends
# -------------------------------------------------
print("Monthly Trends:")
print(monthly_trends(df).tail())

plot_monthly_trends(df)

# -------------------------------------------------
# 7. Save Processed Dataset
# -------------------------------------------------
print("Saving processed dataset...")
save_processed(df, out_file)
print("Saved at:", out_file)


ModuleNotFoundError: No module named 'src'