Step 1:Simulate The Dataset

In [13]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

np.random.seed(42)

n = 1000
students = [f"STU{i:04}" for i in range(1, 201)]
courses = [f"COURSE{i}" for i in range(1, 6)]  # 5 topics/modules

data = []

for i in range(n):
    student_id = np.random.choice(students)
    course_id = np.random.choice(courses)
    enroll_date = datetime(2024, np.random.randint(1, 12), np.random.randint(1, 28))
    completion_pct = np.clip(np.random.normal(75, 25), 0, 100)
    quiz_score = np.clip(np.random.normal(70, 20), 0, 100)
    video_time = np.random.randint(10, 60)
    drop_off = np.random.normal(video_time * np.random.uniform(0.4, 1.0), 5)
    drop_off = max(0, min(video_time, drop_off))

    data.append({
        'Student ID': student_id,
        'Course ID': course_id,
        'Enrollment Date': enroll_date,
        'Completion %': round(completion_pct, 1),
        'Quiz Score': round(quiz_score, 1),
        'Time on Video (min)': video_time,
        'Drop-off Timestamp (min)': round(drop_off, 1)
    })

df = pd.DataFrame(data)
df.to_csv('student_course_data.csv', index=False)


Step 2:Load the data 

In [14]:
df = pd.read_csv('student_course_data.csv', parse_dates=['Enrollment Date'])


Step 3: Completion Rate by Course

In [15]:
df['Completed'] = df['Completion %'] >= 90  # Threshold to consider course "completed"

completion_summary = df.groupby('Course ID').agg(
    Enrolled=('Student ID', 'count'),
    Completed=('Completed', 'sum')
)
completion_summary['Completion Rate (%)'] = (completion_summary['Completed'] / completion_summary['Enrolled']) * 100


Plot: Completion Rate by Course

In [16]:
import plotly.express as px

fig1 = px.bar(
    completion_summary.reset_index(),
    x='Course ID',
    y='Completion Rate (%)',
    title='Course Completion Rate by Course ID',
    text='Completion Rate (%)',
    color='Completion Rate (%)'
)
fig1.show()


Step 4: Student Performance Analysis

In [17]:
performance_summary = df.groupby('Course ID').agg(
    Avg_Quiz_Score=('Quiz Score', 'mean'),
    Avg_Completion_Percent=('Completion %', 'mean')
)


Plot: Avg. Quiz Score per Course

In [18]:
fig2 = px.bar(
    performance_summary.reset_index(),
    x='Course ID',
    y='Avg_Quiz_Score',
    title='Average Quiz Score by Course',
    text='Avg_Quiz_Score',
    color='Avg_Quiz_Score'
)
fig2.show()


Step 5: Correlation — Time on Video vs Quiz Score

In [19]:
%pip install statsmodels




In [20]:

fig3 = px.scatter(
    df,
    x='Time on Video (min)',
    y='Quiz Score',
    color='Course ID',
    trendline='ols',
    title='Quiz Score vs Time Spent on Video'
)
fig3.show()




Step 6: Drop-Off Point Analysis

In [21]:
drop_summary = df.groupby('Course ID').agg(
    Avg_Video_Length=('Time on Video (min)', 'mean'),
    Avg_Drop_Off=('Drop-off Timestamp (min)', 'mean')
)
drop_summary['Avg_Engagement_Rate (%)'] = (drop_summary['Avg_Drop_Off'] / drop_summary['Avg_Video_Length']) * 100


Plot: Engagement Rate by Course

In [22]:
fig4 = px.bar(
    drop_summary.reset_index(),
    x='Course ID',
    y='Avg_Engagement_Rate (%)',
    title='Average Engagement (Drop-off Point) by Course',
    text='Avg_Engagement_Rate (%)',
    color='Avg_Engagement_Rate (%)'
)
fig4.show()


Step 7: Drop-off Heatmap (optional advanced plot)

In [23]:
heat_df = df[['Course ID', 'Drop-off Timestamp (min)']].copy()
heat_df['Drop Bin'] = pd.cut(heat_df['Drop-off Timestamp (min)'], bins=10)

heatmap_data = heat_df.groupby(['Course ID', 'Drop Bin']).size().reset_index(name='Count')
heatmap_data['Drop Bin'] = heatmap_data['Drop Bin'].astype(str)  # Convert Interval to string

fig5 = px.density_heatmap(
    heatmap_data,
    x='Drop Bin',
    y='Course ID',
    z='Count',
    title='Drop-off Timestamp Heatmap by Course',
    color_continuous_scale='Viridis'
)
fig5.show()






Exporting All Plots To Single HTML File

In [24]:
import plotly.io as pio

# List of all your figures
figures = [fig1, fig2, fig3, fig4, fig5]

# Generate HTML strings for each figure
html_parts = [pio.to_html(fig, full_html=False, include_plotlyjs='cdn') for fig in figures]

# Combine them into one full HTML document
full_html = f"""
<html>
<head>
    <title>Spotify Analysis</title>
</head>
<body>
    {''.join(html_parts)}
</body>
</html>
"""

# Save to file
output_path = "Online Course Platform Student Analysis.html"
with open(output_path, "w", encoding="utf-8") as f:
    f.write(full_html)

print(f"Saved to {output_path}")

Saved to Online Course Platform Student Analysis.html
