In [None]:
!pip install ydata-profiling

In [None]:
import pandas as pd
from ydata_profiling import ProfileReport
from sqlalchemy import create_engine

# Function to get a database connection
def get_db_connection():
    engine = create_engine('postgresql://postgres:1234@localhost:5432/june')
    return engine.connect()

# Function to load forum data
def load_forum_data():
    conn = get_db_connection()
    query = "SELECT * FROM forums"
    df_forums = pd.read_sql(query, conn)
    conn.close()
    return df_forums

# Function to get posts data for a specific forum
def get_posts_data(forum_id):
    conn = get_db_connection()
    query = """
    SELECT DISTINCT posts.topic_id, posts.user_id, posts.dateadded_post
    FROM posts
    INNER JOIN topics ON posts.topic_id = topics.topic_id
    WHERE topics.forum_id = %s
    """
    df_posts = pd.read_sql(query, conn, params=(int(forum_id),))
    conn.close()
    return df_posts

# Function to generate a profiling report for each forum
def generate_profiling_report(forum_id):
    df_posts = get_posts_data(forum_id)

    # Convert dateadded_post to datetime
    df_posts['dateadded_post'] = pd.to_datetime(df_posts['dateadded_post'], utc=True)

    # Generate profiling report
    profile = ProfileReport(df_posts, title=f"Profiling Report for Forum ID {forum_id}")
    
    # Save the report to an HTML file
    profile.to_file(f"profiling_report_forum_{forum_id}.html")

# Load forum data
df_forums = load_forum_data()

# Extract list of unique forum IDs
forum_ids = df_forums['forum_id'].unique()

# Generate profiling report for each forum
for forum_id in forum_ids:
    generate_profiling_report(forum_id)
    print(f"Profiling report generated for Forum ID {forum_id}")