# Tulsa Government Access Television Meetings

This notebook demonstrates how to fetch meeting data from the Tulsa Government Access Television website and convert it to a pandas DataFrame for analysis.

In [None]:
import sys
import os
from pathlib import Path
import asyncio
import pandas as pd
from IPython.display import display

sys.path.append("../")


## Fetch Meeting Data

First, let's fetch the meeting data from the TGOV website using our `get_meetings()` function.

In [None]:
from src.meetings import get_meetings

# Fetch the meetings
meetings = await get_meetings()

# Display the first few meetings
print(f"Found {len(meetings)} meetings")
meetings[:5]

## Convert to DataFrame

Now, let's convert the list of Meeting objects to a pandas DataFrame for easier analysis.

In [None]:
# Convert Meeting objects to dictionaries
meeting_dicts = [meeting.model_dump() for meeting in meetings]

# Create a DataFrame
df = pd.DataFrame(meeting_dicts)

# Display the DataFrame
display(df.head())
print(f"DataFrame shape: {df.shape}")

## Basic Analysis

Let's perform some basic analysis on the meeting data.

In [None]:
import matplotlib.pyplot as plt

# Count meetings by type
meeting_counts = df['meeting'].value_counts()
display(meeting_counts)

# Plot the meeting counts
meeting_counts.plot(kind='bar', figsize=(12, 6))
plt.title('Number of Meetings by Type')
plt.xlabel('Meeting Type')
plt.ylabel('Count')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

## Duration Analysis

Let's analyze the duration of meetings.

In [None]:
# Convert duration strings to minutes
def duration_to_minutes(duration):
    if not duration or pd.isna(duration):
        return None
    
    # Parse duration in format "00h 39m"
    try:
        hours = 0
        minutes = 0
        
        if 'h' in duration:
            hours_part = duration.split('h')[0].strip()
            hours = int(hours_part)
        
        if 'm' in duration:
            if 'h' in duration:
                minutes_part = duration.split('h')[1].split('m')[0].strip()
            else:
                minutes_part = duration.split('m')[0].strip()
            minutes = int(minutes_part)
        
        return hours * 60 + minutes
    except:
        return None

# Apply the conversion
df['duration_minutes'] = df['duration'].apply(duration_to_minutes)

# Display statistics about meeting durations
duration_stats = df['duration_minutes'].describe()
display(duration_stats)

# Plot a histogram of meeting durations
plt.figure(figsize=(10, 6))
df['duration_minutes'].hist(bins=20)
plt.title('Distribution of Meeting Durations')
plt.xlabel('Duration (minutes)')
plt.ylabel('Count')
plt.grid(False)
plt.show()

## Average Duration by Meeting Type

Let's calculate the average duration for each type of meeting.

In [None]:
# Group by meeting type and calculate average duration
avg_duration_by_type = df.groupby('meeting')['duration_minutes'].mean().sort_values(ascending=False)
display(avg_duration_by_type)

# Plot the average durations
avg_duration_by_type.plot(kind='bar', figsize=(12, 6))
plt.title('Average Meeting Duration by Type')
plt.xlabel('Meeting Type')
plt.ylabel('Average Duration (minutes)')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

## Save the DataFrame

Finally, let's save the DataFrame to a CSV file for future use.

In [None]:
df.head()

In [None]:
# Save to CSV
output_path = '../data/meetings.csv'
os.makedirs(os.path.dirname(output_path), exist_ok=True)
df.to_csv(output_path, index=False)
print(f"DataFrame saved to {output_path}")