In [80]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import json
from datetime import datetime, timedelta
import re

In [91]:
# Function to create a DataFrame from a JSON URL
def create_df_from_json_url(url):
  response = requests.get(url)
  data = json.loads(response.text)
  df = pd.json_normalize(data, record_path=['events'])
  return df
def convert_events_date(date_str):
    try:
        return pd.to_datetime(date_str, format='%A, %B %d, %Y', errors='raise')
    except ValueError:
        # format is Monday, April 21 2025, change format
        return pd.to_datetime(date_str, format='%A, %b %d %Y', errors='raise')

def convert_events_time(time_str):
    if time_str == 'All Day':
        return pd.to_datetime('00:00:00', format='%H:%M:%S').time()  # Set to midnight for "All Day"
    elif re.match(r'\d{1,2}:\d{2}[ap]m - \d{1,2}:\d{2}[ap]m', time_str):  # Handle ranges
        start_time_str = time_str.split(' - ')[0]
        return pd.to_datetime(start_time_str, format='%I:%M%p').time()
    else:
        try:
            return pd.to_datetime(time_str, format='%I:%M%p').time()
        except ValueError:
            return pd.NaT  # Handle invalid formats

# Create DataFrame for the second JSON file (nyuEventsRaw.json)
events_url = "https://raw.githubusercontent.com/Md905908324/hofhack25/refs/heads/main/nyuEventsRaw.json"
events_df = create_df_from_json_url(events_url)
events_df['date'] = events_df['date'].apply(convert_events_date)
events_df['time'] = events_df['time'].apply(convert_events_time)
events_df['datetime'] = pd.to_datetime(events_df['date'].astype(str) + ' ' + events_df['time'].astype(str), errors='coerce')
events_df = events_df.drop(columns=['date', 'time'])
events_df

Unnamed: 0,name,location,description,datetime
0,ACF Nationals,University of Maryland,,2025-04-20 00:00:00
1,AMC Virtual Diaper Drive,Benefitting Little Essentials,,2025-04-20 00:00:00
2,Bridge and Spades Meeting & Eboard Election,Kimmel 802 Shoren,,2025-04-20 00:00:00
3,Bridges Mentorship Program,Kimmel Rm 375,,2025-04-20 00:00:00
4,CBL Doodles; Tea and Joe’s Pizza,GLOBAL CTR (Kimmel) - GC 361,,2025-04-20 00:00:00
...,...,...,...,...
149,Ethics Workshop Series,Washington Square Campus,,2025-04-21 16:00:00
150,Papotons en français!,Washington Square Campus,,2025-04-22 12:30:00
151,Ethics Workshop Series,Washington Square Campus,,2025-04-22 16:00:00
152,Papotons en français!,Washington Square Campus,,2025-04-23 12:30:00


In [82]:
engage_url = "https://raw.githubusercontent.com/Md905908324/hofhack25/refs/heads/main/nyuEngageRaw.json"
engage_df = create_df_from_json_url(engage_url)

def convert_date(date_str):
    # Remove the time zone abbreviation (e.g., EDT)
    date_str = re.sub(r" EDT| EST", "", date_str)
    # Add the year 2025 to the date string
    date_str = date_str + " 2025"
    # Parse the date and time
    return pd.to_datetime(date_str, format='%A, %B %d at %I:%M%p %Y')

engage_df['date'] = engage_df['date'].apply(convert_date)
engage_df = engage_df.rename(columns={'date': 'datetime'})


In [83]:
engage_df

Unnamed: 0,name,datetime,location,organization,host
0,Volunteer at Tompkins Distro w/ CAS ISC,2025-04-19 09:00:00,Tompkins Square Park,,
1,E Board Gathering Meeting,2025-04-19 11:00:00,Online,International Students Club,
2,Taiwanese Student Association - All-University...,2025-04-19 11:00:00,Online,,
3,Peer Tutoring,2025-04-19 11:00:00,CycleBar Noho,,
4,Society for Women in Sports - All-University M...,2025-04-19 11:00:00,6 Metrotech Room 304,,
...,...,...,...,...,...
81,English Conversation Group Meeting,2025-04-23 19:15:00,GCASL 361,,Compass Koinonia - All-University
82,Finding Purpose,2025-04-23 18:30:00,Kimmel CTR 908,,Lotus Lounge - All-University
83,Weekly Writers Meeting,2025-04-23 18:30:00,Kimmel Center - Room 904,,Plague - All-University
84,Orevwa Chérie!,2025-04-23 18:30:00,Kimmel 905,,Haitian American Students Association - All-Un...


In [93]:
combined_df = pd.concat([engage_df, events_df], ignore_index=True)
combined_df

Unnamed: 0,name,datetime,location,organization,host,description
0,Volunteer at Tompkins Distro w/ CAS ISC,2025-04-19 09:00:00,Tompkins Square Park,,,
1,E Board Gathering Meeting,2025-04-19 11:00:00,Online,International Students Club,,
2,Taiwanese Student Association - All-University...,2025-04-19 11:00:00,Online,,,
3,Peer Tutoring,2025-04-19 11:00:00,CycleBar Noho,,,
4,Society for Women in Sports - All-University M...,2025-04-19 11:00:00,6 Metrotech Room 304,,,
...,...,...,...,...,...,...
235,Ethics Workshop Series,2025-04-21 16:00:00,Washington Square Campus,,,
236,Papotons en français!,2025-04-22 12:30:00,Washington Square Campus,,,
237,Ethics Workshop Series,2025-04-22 16:00:00,Washington Square Campus,,,
238,Papotons en français!,2025-04-23 12:30:00,Washington Square Campus,,,


In [94]:
combined_df['All Day'] = combined_df['datetime'].dt.time == pd.to_datetime('00:00:00').time()
combined_df = combined_df.drop(columns=['organization', 'host', 'description'])
combined_df = combined_df.dropna(subset=['datetime'])
combined_df = combined_df.sort_values(by='datetime')
combined_df = combined_df.reset_index(drop=True)
combined_df

Unnamed: 0,name,datetime,location,All Day
0,APDA Tournament Hosted by University of Virginia,2025-04-18 15:00:00,University of Virginia,False
1,HOF Hacks - 24 hour hackathon with Vercel & Figma,2025-04-18 16:30:00,HOF Capital NYC HQ,False
2,Volunteer at Tompkins Distro w/ CAS ISC,2025-04-19 09:00:00,Tompkins Square Park,False
3,Peer Tutoring,2025-04-19 11:00:00,CycleBar Noho,False
4,E Board Gathering Meeting,2025-04-19 11:00:00,Online,False
...,...,...,...,...
220,Muslim Voices for Social Justice,2025-04-23 19:00:00,Kimmel Center,False
221,English Conversation Group Meeting,2025-04-23 19:15:00,GCASL 361,False
222,Spring Jazz Festival: Vocal Jazz Ensemble,2025-04-23 19:30:00,Frederick Loewe Theatre,False
223,Cross-Examination Debate Association - General...,2025-04-23 19:30:00,Global Center for Academic and Spiritual Life,False


In [95]:
combined_df.to_csv('combined_data.csv', index=False)