In [272]:
import os
import kagglehub
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

path = kagglehub.dataset_download("thedevastator/sxsw-2019-schedule-dataset")

print("Path to dataset files:", path)
print("Files in the dataset:", os.listdir(path))

Path to dataset files: /kaggle/input/sxsw-2019-schedule-dataset
Files in the dataset: ['speaker_schedule_2019.csv', 'sxsw_schedule_collector.py', 'music_schedule_2019.csv']


In [273]:
def keep_first_n(df: pd.DataFrame, n: int = 50) -> pd.DataFrame:
    """Slice the DataFrame down to the first n rows and return it."""
    return df.iloc[:n].reset_index(drop=True)

In [274]:
def generate_future_datetimes(n=50, weeks_ahead_start=1, months_ahead_end=4):
    """
    Generate n synthetic datetimes between 'next week' and 'months_ahead_end' months ahead in 2025.
    Returns a pandas Series of strings in schema format (YYYY-MM-DD HH:MM:SS).
    """
    now = datetime.now()
    start_window = (now + timedelta(weeks=weeks_ahead_start)).replace(year=2025)
    end_window = (now + timedelta(days=months_ahead_end*30)).replace(year=2025)

    datetimes = []
    for _ in range(n):
        # picks a random day in range
        delta_days = (end_window - start_window).days
        d = start_window + timedelta(days=random.randint(0, delta_days))
        # picks a random hour/minute for event start
        hour = random.randint(10, 23)  # 10am–11pm
        minute = random.choice([0, 15, 30, 45])
        dt = d.replace(hour=hour, minute=minute, second=0, microsecond=0)
        datetimes.append(dt.strftime("%Y-%m-%d %H:%M:%S"))

    return pd.Series(datetimes)

In [275]:
music_events = pd.read_csv('/kaggle/input/sxsw-2019-schedule-dataset/music_schedule_2019.csv')
music_events.head()

Unnamed: 0,index,name,summary,genre,subgenre,home,audio,title_and_location,date,event_time,venue
0,0,A5iv3,"They Call me A5iv3(A5) I'm From Saint Paul,MN ...",Hip-Hop / Rap,,"St. Paul, MN",http://audio.sxsw.com/2019/mp3_by_artist_id/e5...,A5iv3 at Buffalo Billiards,"Mar 14, 2019",,Buffalo Billiards
1,1,AAerial,A fictional story of a lone space traveler tol...,Metal,Progressive,"Austin, TX",http://audio.sxsw.com/2019/mp3_by_artist_id/a4...,AAerial at Dirty Dog Bar,"Mar 16, 2019",7:30pm-8:00pm,Dirty Dog Bar
2,2,Aaron Cohen,"Based in Queens, NY, Aaron Cohen was quickly e...",Hip-Hop / Rap,Hip-Hop,"New York, NY",http://audio.sxsw.com/2019/mp3_by_artist_id/5e...,Aaron Cohen at Mohawk Indoor,"Mar 16, 2019",1:35am-1:50am,Mohawk Indoor
3,3,Abhi The Nomad,"Abhi The Nomad is a producer, rapper and singe...",Hip-Hop / Rap,Hip-Hop / Rap,"Austin, TX",http://audio.sxsw.com/2019/mp3_by_artist_id/90...,Abhi The Nomad at Nuevo Leon,"Mar 15, 2019",3:00pm-3:40pm,Nuevo Leon
4,4,Abjects,Abjects are composed of singer/guitarist Noemi...,Punk,Garage,"London, UK-ENG",,Abjects at Cheer Up Charlie's Inside,"Mar 16, 2019",12:10am-12:45am,Cheer Up Charlie's Inside


In [276]:
music_events.isna().sum()

Unnamed: 0,0
index,0
name,0
summary,0
genre,11
subgenre,97
home,11
audio,773
title_and_location,12
date,12
event_time,78


In [277]:
music_events.dropna(axis=0, inplace=True)
music_events.isna().sum()

Unnamed: 0,0
index,0
name,0
summary,0
genre,0
subgenre,0
home,0
audio,0
title_and_location,0
date,0
event_time,0


In [278]:
music_events = keep_first_n(music_events)

In [279]:
music_events.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 11 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   index               50 non-null     int64 
 1   name                50 non-null     object
 2   summary             50 non-null     object
 3   genre               50 non-null     object
 4   subgenre            50 non-null     object
 5   home                50 non-null     object
 6   audio               50 non-null     object
 7   title_and_location  50 non-null     object
 8   date                50 non-null     object
 9   event_time          50 non-null     object
 10  venue               50 non-null     object
dtypes: int64(1), object(10)
memory usage: 4.4+ KB


In [280]:
music_events.drop(columns=['index','name','subgenre','audio','title_and_location', 'date', 'event_time'], inplace=True)
music_events.head()

Unnamed: 0,summary,genre,home,venue
0,A fictional story of a lone space traveler tol...,Metal,"Austin, TX",Dirty Dog Bar
1,"Based in Queens, NY, Aaron Cohen was quickly e...",Hip-Hop / Rap,"New York, NY",Mohawk Indoor
2,"Abhi The Nomad is a producer, rapper and singe...",Hip-Hop / Rap,"Austin, TX",Nuevo Leon
3,Abraham Alexander is one of the most exciting ...,Soul,"Fort Worth, TX",St David's Historic Sanctuary
4,No description provided for artist.,Singer-Songwriter,"Los Angeles, CA",Empire Garage


In [281]:
music_events.rename(columns={"summary": "description"}, inplace=True)

music_events["location"] = music_events["venue"] + ", " + music_events["home"]
music_events['name'] = music_events['genre'] + " Concert"
music_events['category'] = "Concert"
music_events['datetime'] = generate_future_datetimes()

music_events.drop(columns=['home','venue','genre'], inplace=True)

In [282]:
music_events.head()

Unnamed: 0,description,location,name,category,datetime
0,A fictional story of a lone space traveler tol...,"Dirty Dog Bar, Austin, TX",Metal Concert,Concert,2025-11-10 15:30:00
1,"Based in Queens, NY, Aaron Cohen was quickly e...","Mohawk Indoor, New York, NY",Hip-Hop / Rap Concert,Concert,2025-11-24 10:15:00
2,"Abhi The Nomad is a producer, rapper and singe...","Nuevo Leon, Austin, TX",Hip-Hop / Rap Concert,Concert,2025-11-20 17:00:00
3,Abraham Alexander is one of the most exciting ...,"St David's Historic Sanctuary, Fort Worth, TX",Soul Concert,Concert,2025-09-22 17:15:00
4,No description provided for artist.,"Empire Garage, Los Angeles, CA",Singer-Songwriter Concert,Concert,2025-10-20 23:30:00


In [283]:
music_events['description'] = music_events['description'].replace(
        to_replace=r"(?i).*no description.*",
        value=np.nan,
        regex=True
    )
music_events.head()

Unnamed: 0,description,location,name,category,datetime
0,A fictional story of a lone space traveler tol...,"Dirty Dog Bar, Austin, TX",Metal Concert,Concert,2025-11-10 15:30:00
1,"Based in Queens, NY, Aaron Cohen was quickly e...","Mohawk Indoor, New York, NY",Hip-Hop / Rap Concert,Concert,2025-11-24 10:15:00
2,"Abhi The Nomad is a producer, rapper and singe...","Nuevo Leon, Austin, TX",Hip-Hop / Rap Concert,Concert,2025-11-20 17:00:00
3,Abraham Alexander is one of the most exciting ...,"St David's Historic Sanctuary, Fort Worth, TX",Soul Concert,Concert,2025-09-22 17:15:00
4,,"Empire Garage, Los Angeles, CA",Singer-Songwriter Concert,Concert,2025-10-20 23:30:00


In [284]:
path = kagglehub.dataset_download("prashdash112/meetup-events-data")

print("Path to dataset files:", path)
print("Files in the dataset:", os.listdir(path))

Path to dataset files: /kaggle/input/meetup-events-data
Files in the dataset: ['sample.xlsx']


In [285]:
meetup_events = pd.read_excel('/root/.cache/kagglehub/datasets/prashdash112/meetup-events-data/versions/2/sample.xlsx')
meetup_events.head()

Unnamed: 0,id,date,state,category,time,groupname,name,links,attendee
0,asas,2020-04-20,San-Francisco,"Technology,business",00:00:00,Product School Bangalore,Webinar: PM in a Startup Setting by Alexa Prod...,https://www.meetup.com/PS-Bangalore/events/270...,6 Members going
1,,2020-04-20,San-Francisco,"Technology,business",07:00:00,SouJava,Live SouJava- Microservice Patterns - Implemen...,https://www.meetup.com/SouJava/events/269602686/,119 Javeiros going
2,0,2020-04-20,San-Francisco,"Technology,business",07:30:00,DevOps | CODE Event Series,enterprise:CODE 2020,https://www.meetup.com/DevOps-CODE-Event-Serie...,10 Mitglieder going
3,0,2020-04-20,San-Francisco,"Technology,business",08:00:00,DC Cybersecurity Training,Certified CISO,https://www.meetup.com/washington-dc-cybersecu...,4 Members going
4,0,2020-04-20,San-Francisco,"Technology,business",08:00:00,DC Cybersecurity Training,Certified Ethical Hacker (CEH),https://www.meetup.com/washington-dc-cybersecu...,3 Members going


In [286]:
meetup_events.isna().sum()

Unnamed: 0,0
id,1
date,0
state,0
category,0
time,0
groupname,0
name,0
links,0
attendee,0


In [287]:
meetup_events = keep_first_n(meetup_events)

In [288]:
meetup_events.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 9 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   id         49 non-null     object        
 1   date       50 non-null     datetime64[ns]
 2   state      50 non-null     object        
 3   category   50 non-null     object        
 4   time       50 non-null     object        
 5   groupname  50 non-null     object        
 6   name       50 non-null     object        
 7   links      50 non-null     object        
 8   attendee   50 non-null     object        
dtypes: datetime64[ns](1), object(8)
memory usage: 3.6+ KB


In [289]:
meetup_events.drop(columns=['id','links','attendee','groupname','date','time'], inplace=True)
meetup_events['datetime'] = generate_future_datetimes()

In [290]:
meetup_events.head()

Unnamed: 0,state,category,name,datetime
0,San-Francisco,"Technology,business",Webinar: PM in a Startup Setting by Alexa Prod...,2025-09-16 18:30:00
1,San-Francisco,"Technology,business",Live SouJava- Microservice Patterns - Implemen...,2025-10-15 12:30:00
2,San-Francisco,"Technology,business",enterprise:CODE 2020,2025-10-04 21:00:00
3,San-Francisco,"Technology,business",Certified CISO,2025-12-08 19:45:00
4,San-Francisco,"Technology,business",Certified Ethical Hacker (CEH),2025-11-04 22:00:00


In [291]:
meetup_events["name"] = meetup_events["name"].str.replace(r"^[^A-Za-z0-9]+", "", regex=True)
meetup_events.rename(columns={"state": "location"}, inplace=True)
meetup_events.head()

Unnamed: 0,location,category,name,datetime
0,San-Francisco,"Technology,business",Webinar: PM in a Startup Setting by Alexa Prod...,2025-09-16 18:30:00
1,San-Francisco,"Technology,business",Live SouJava- Microservice Patterns - Implemen...,2025-10-15 12:30:00
2,San-Francisco,"Technology,business",enterprise:CODE 2020,2025-10-04 21:00:00
3,San-Francisco,"Technology,business",Certified CISO,2025-12-08 19:45:00
4,San-Francisco,"Technology,business",Certified Ethical Hacker (CEH),2025-11-04 22:00:00


In [292]:
social_events = pd.read_csv('/content/synthetic_events_next_two_weeks_60.csv')
social_events.head()

Unnamed: 0,description,date,time,location,name,category,datetime
0,Funk Festival at Electric Garden in Los Angele...,"Aug 29, 2025",9:06pm-11:06pm,"Electric Garden, Los Angeles, CA",Funk Festival,Concert,2025-08-29 21:06:00
1,EDM Festival at Rooftop 512 in Boston. Live mu...,"Aug 31, 2025",11:52pm-12:52am,"Rooftop 512, Boston, MA",EDM Festival,Concert,2025-08-31 23:52:00
2,DJ Night at Riverfront Park in Austin. Dance f...,"Aug 26, 2025",8:58pm-10:13pm,"Riverfront Park, Austin, TX",DJ Night,Party,2025-08-26 20:58:00
3,Rock Live at Mohawk Indoor in San Francisco. L...,"Aug 31, 2025",9:27pm-10:57pm,"Mohawk Indoor, San Francisco, CA",Rock Live,Concert,2025-08-31 21:27:00
4,Latino Concert Night at Riverfront Park in Chi...,"Aug 27, 2025",6:03pm-7:03pm,"Riverfront Park, Chicago, IL",Latino Concert Night,Concert,2025-08-27 18:03:00


In [293]:
social_events.drop(columns=['date','time'], inplace=True)

In [294]:
event_dataset = pd.concat([music_events, meetup_events, social_events], ignore_index=True)
event_dataset

Unnamed: 0,description,location,name,category,datetime
0,A fictional story of a lone space traveler tol...,"Dirty Dog Bar, Austin, TX",Metal Concert,Concert,2025-11-10 15:30:00
1,"Based in Queens, NY, Aaron Cohen was quickly e...","Mohawk Indoor, New York, NY",Hip-Hop / Rap Concert,Concert,2025-11-24 10:15:00
2,"Abhi The Nomad is a producer, rapper and singe...","Nuevo Leon, Austin, TX",Hip-Hop / Rap Concert,Concert,2025-11-20 17:00:00
3,Abraham Alexander is one of the most exciting ...,"St David's Historic Sanctuary, Fort Worth, TX",Soul Concert,Concert,2025-09-22 17:15:00
4,,"Empire Garage, Los Angeles, CA",Singer-Songwriter Concert,Concert,2025-10-20 23:30:00
...,...,...,...,...,...
155,Funk Live at Downtown Pavilion in Las Vegas. L...,"Downtown Pavilion, Las Vegas, NV",Funk Live,Concert,2025-08-25 21:19:00
156,Indie Club Night at Buffalo Billiards in Las V...,"Buffalo Billiards, Las Vegas, NV",Indie Club Night,Party,2025-08-27 11:20:00
157,Ladies Night at Electric Garden in Chicago. Da...,"Electric Garden, Chicago, IL",Ladies Night,Party,2025-08-24 20:39:00
158,Acoustic Picnic at Cheer Up Charlie's Inside i...,"Cheer Up Charlie's Inside, Nashville, TN",Acoustic Picnic,Picnic,2025-08-30 19:10:00


In [295]:
event_dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 160 entries, 0 to 159
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   description  109 non-null    object
 1   location     160 non-null    object
 2   name         160 non-null    object
 3   category     160 non-null    object
 4   datetime     160 non-null    object
dtypes: object(5)
memory usage: 6.4+ KB
