In [None]:
import pandas as pd
from evidently import Dataset, DataDefinition, Report
from evidently.future.metrics import MeanError
from evidently.future.presets import (
    DataSummaryPreset, DataDriftPreset, TextEvals
)
from evidently.future.descriptors import Sentiment, TextLength, Contains
from evidently import Regression

# read the datasets
ref_df = pd.read_csv("../data/students_reference_dataset.csv")
curr_df = pd.read_csv("../data/students_current_dataset.csv")

# Define schema with DataDefinition,
schema = DataDefinition(
    numerical_columns=[
        "age",
        "study_hours_per_week",
        "previous_gpa",
        "actual_grade",
        "predicted_grade"
    ],
    categorical_columns=[
        "course_difficulty",
        "satisfaction_level"
    ],
    text_columns=[
        "feedback_text"
    ],
    regression=[
        Regression(
            target="actual_grade",
            prediction="predicted_grade"
        )
    ]
)

# Attach descriptors to the feedback text
descriptors = [
    Sentiment("feedback_text", alias="Sentiment"),
    TextLength("feedback_text", alias="Text Length"),
    Contains("feedback_text", items=["confusing", "unclear", "repetitive"], alias="Clarity Issues"),
    Contains("feedback_text", items=["excellent", "rewarding", "valuable"], alias="Positive Highlights")

]


ref_data = Dataset.from_pandas(ref_df, data_definition=schema, descriptors=descriptors)
curr_data = Dataset.from_pandas(curr_df, data_definition=schema, descriptors=descriptors)

# Generate the report using these Evidently presets
report = Report([
    DataSummaryPreset(),
    DataDriftPreset(),
    TextEvals(),
    MeanError()
], include_tests=True)

result = report.run(reference_data=ref_data, current_data=curr_data)

# Save HTML report
result.save_html("../reports/student_full_evaluation_report.html")
print("Report saved successfully to 'reports'.")
 