In [None]:
from google.colab import drive
import pandas as pd

drive.mount('/content/drive')

In [None]:
file_path = '/content/drive/MyDrive/PROJECTS/PMG/Parlimeter/committee-meetings.csv'

df = pd.read_csv(file_path)
df.head()

In [None]:
df = df.rename(columns={
    'Committee → Name': 'committee',
    'Actual End Time': 'actual_end_time',
    'Actual Start Time': 'actual_start_time',
    'Scheduled End Time': 'scheduled_end_time',
    'Scheduled Start Time': 'scheduled_start_time',
})



In [None]:
# Convert date columns to datetime objects
date_columns = ['Date']
for col in date_columns:
    df[col] = pd.to_datetime(df[col])

# Format date columns to 'yyyy-mm-dd'
for col in date_columns:
    df[col] = df[col].dt.strftime('%Y-%m-%d')

In [None]:
df

In [None]:
df = df.drop(columns=['ID', 'Title'])
df

In [None]:
import pandas as pd
from datetime import datetime


# Convert "HH:MM AM/PM" to datetime (coerce=NaT for invalid/None).
for col in ["scheduled_start_time", "scheduled_end_time",
            "actual_start_time", "actual_end_time"]:
    df[col] = pd.to_datetime(df[col], format="%I:%M %p", errors="coerce")

def get_minutes(dt):
    """Return minutes from midnight or None if dt is NaT."""
    if pd.isnull(dt):
        return None
    return dt.hour * 60 + dt.minute

def compute_durations(row):
    """Compute scheduled_time, actual_time, and whether they were estimated."""
    sched_start = get_minutes(row["scheduled_start_time"])
    sched_end   = get_minutes(row["scheduled_end_time"])
    actual_start = get_minutes(row["actual_start_time"])
    actual_end   = get_minutes(row["actual_end_time"])

    # --- scheduled_time ---
    if sched_start is not None and sched_end is not None:
        scheduled_time = sched_end - sched_start
        estimated_scheduled = 0
    elif sched_start is not None or sched_end is not None:
        scheduled_time = 60  # fallback
        estimated_scheduled = 1
    else:
        if actual_start is not None and actual_end is not None:
            scheduled_time = actual_end - actual_start
            estimated_scheduled = 0
        else:
            scheduled_time = 60
            estimated_scheduled = 1

    # --- actual_time ---
    if actual_start is not None and actual_end is not None:
        actual_time = actual_end - actual_start
        estimated_actual = 0
    elif actual_start is not None or actual_end is not None:
        actual_time = 60
        estimated_actual = 1
    else:
        if sched_start is not None and sched_end is not None:
            actual_time = sched_end - sched_start
            estimated_actual = 0
        else:
            actual_time = 60
            estimated_actual = 1

    return pd.Series({
        "scheduled_time": scheduled_time,
        "actual_time": actual_time,
        "estimated_scheduled": estimated_scheduled,
        "estimated_actual": estimated_actual
    })

# 1) Calculate durations & flags for each row
df[["scheduled_time", "actual_time", "estimated_scheduled", "estimated_actual"]] = (
    df.apply(compute_durations, axis=1)
)

# 2) Group by committee and sum
grouped = df.groupby("committee").agg({
    "scheduled_time": "sum",
    "actual_time": "sum",
    "estimated_scheduled": "sum",
    "estimated_actual": "sum"
}).reset_index()

# 3) Build final data structure
final_data = []
for _, row in grouped.iterrows():
    committee_name = row["committee"]
    committee_rows = df["committee"] == committee_name
    final_data.append({
        "committee": committee_name,
        "meetings": int(committee_rows.sum()),
        "total_scheduled_time": int(row["scheduled_time"]),  # in minutes
        "total_time": int(row["actual_time"]),              # in minutes
        "estimated_count": int(row["estimated_scheduled"] + row["estimated_actual"]),
        "trend": [2, 1, 4, 20],  # placeholder
    })

# 4) Print as a JavaScript-like list of objects
print(final_data)


In [None]:
output_file_path = '/content/drive/MyDrive/PROJECTS/PMG/Parlimeter/modified_parlimeter__committee_meetings.csv'
df.to_csv(output_file_path, index=False)

# Work on meetings with attendance csv

In [None]:
from google.colab import drive
import pandas as pd

# Mount Google Drive
drive.mount('/content/drive')

In [None]:
file_path = '/content/drive/MyDrive/PROJECTS/PMG/Parlimeter/committee-meeting-with-attendance-and-time.csv'
df = pd.read_csv(file_path)

In [None]:
df

In [None]:
for col in ["scheduled_start_time", "scheduled_end_time",
            "actual_start_time", "actual_end_time"]:
    df[col] = pd.to_datetime(df[col], format="%H:%M", errors="coerce")

def get_minutes(dt):
    """Return minutes from midnight or None if dt is NaT."""
    return None if pd.isnull(dt) else dt.hour * 60 + dt.minute

def compute_durations(row):
    """Compute scheduled_time, actual_time, and whether they were estimated."""
    sched_start = get_minutes(row["scheduled_start_time"])
    sched_end   = get_minutes(row["scheduled_end_time"])
    actual_start = get_minutes(row["actual_start_time"])
    actual_end   = get_minutes(row["actual_end_time"])

    # --- scheduled_time ---
    if sched_start is not None and sched_end is not None:
        scheduled_time = sched_end - sched_start
        estimated_scheduled = 0
    elif sched_start is not None or sched_end is not None:
        scheduled_time = 60  # fallback
        estimated_scheduled = 1
    else:
        if actual_start is not None and actual_end is not None:
            scheduled_time = actual_end - actual_start
            estimated_scheduled = 0
        else:
            scheduled_time = 60
            estimated_scheduled = 1

    # --- actual_time ---
    if actual_start is not None and actual_end is not None:
        actual_time = actual_end - actual_start
        estimated_actual = 0
    elif actual_start is not None or actual_end is not None:
        actual_time = 60
        estimated_actual = 1
    else:
        if sched_start is not None and sched_end is not None:
            actual_time = sched_end - sched_start
            estimated_actual = 0
        else:
            actual_time = 60
            estimated_actual = 1

    return pd.Series({
        "scheduled_time": scheduled_time,
        "actual_time": actual_time
    })

df[["scheduled_time", "actual_time"]] = (
    df.apply(compute_durations, axis=1)
)

df

In [None]:
for col in ["scheduled_start_time", "scheduled_end_time",
            "actual_start_time", "actual_end_time"]:
    df[col] = df[col].dt.strftime("%H:%M")

In [None]:
df

In [None]:
output_file_path = '/content/drive/MyDrive/PROJECTS/PMG/Parlimeter/committee-meeting-with-attendance-and-time-final.csv'
df.to_csv(output_file_path, index=False)
