In [1]:
import pandas as pd
from datetime import datetime, timedelta
import os

In [2]:
def create_folder(folder_name):
    if not os.path.exists(folder_name):
        os.makedirs(folder_name)

In [3]:
def process_tweets(df, output_folder, max_tweets_per_interval=500):
    df['created_at'] = pd.to_datetime(df['created_at'])
    time_interval = timedelta(minutes=5)
    create_folder(output_folder)
    
    for start_time in pd.date_range(start=df['created_at'].min(), end=df['created_at'].max(), freq=time_interval):
        end_time = start_time + time_interval
        interval_tweets = df[(df['created_at'] >= start_time) & (df['created_at'] < end_time)]

        if len(interval_tweets) > max_tweets_per_interval:
            interval_tweets = interval_tweets.sample(n=max_tweets_per_interval, random_state=42)

        print(f"Interval: {start_time} to {end_time}, Number of Tweets: {len(interval_tweets)}")

        filename = f"{start_time.strftime('%Y-%m-%d_%H-%M')}_{end_time.strftime('%Y-%m-%d_%H-%M')}.txt"
        output_path = os.path.join(output_folder, filename)
        interval_tweets.to_csv(output_path, columns=['text'], index=False, header=False)


In [4]:
df =pd.read_csv('updated_file.csv') 
output_folder = 'output_tweets'

In [5]:
df.head()

Unnamed: 0,id,text,username,hashtags,created_at,user followers count,replycount,retweetcount,likecount,quotecount,language,quotedtweet,inReplyToTweetId,inReplyToUser,mentionedUsers
0,1584152660227526662,King is back #VK18 #ViratKohli #INDvsPAK2022,VltSentinel,"['VK18', 'ViratKohli', 'INDvsPAK2022']",2022-10-23 12:00:00+00:00,19018,6,13,889,0,en,,,,King is back #VK18 #ViratKohli #INDvsPAK2022
1,1584152660063944704,India 🇮🇳🇮🇳🔥❤️🥰🥰🥰\n@imVkohli 🙌🏻👏🏻\n#INDvsPAK2022,Rohitrouth12,['INDvsPAK2022'],2022-10-23 12:00:00+00:00,268,0,0,1,0,pt,,,,India 🇮🇳🇮🇳🔥❤️🥰🥰🥰\n@imVkohli 🙌🏻👏🏻\n#INDvsPAK2022
2,1584152660043001857,There is only one King 👑 @imVkohli #INDvsPAK20...,SuhailSAhmed,"['INDvsPAK2022', 'ViratKohli', 'T20WorldCup2022']",2022-10-23 12:00:00+00:00,823,0,0,0,0,en,,,,There is only one King 👑 @imVkohli #INDvsPAK20...
3,1584152659673509889,WHAT A GAME!!!!👊👏👏😀❤️ #Victory #INDIA #T20Worl...,thesushmitasen,"['Victory', 'INDIA', 'T20WorldCup2022', 'INDvs...",2022-10-23 12:00:00+00:00,6687300,82,1998,21172,17,en,,,,WHAT A GAME!!!!👊👏👏😀❤️ #Victory #INDIA #T20Worl...
4,1584152659669716994,👑👑👑👑👑👑👑👑👑👑👑👑👑👑👑👑👑👑 @imVkohli #INDvsPAK2022 #T...,immka013,"['INDvsPAK2022', 'T20WC2022']",2022-10-23 12:00:00+00:00,226,0,0,0,0,und,,,,👑👑👑👑👑👑👑👑👑👑👑👑👑👑👑👑👑👑 @imVkohli #INDvsPAK2022 #T...


In [6]:
process_tweets(df,output_folder)

Interval: 2022-10-23 08:00:01+00:00 to 2022-10-23 08:05:01+00:00, Number of Tweets: 500
Interval: 2022-10-23 08:05:01+00:00 to 2022-10-23 08:10:01+00:00, Number of Tweets: 500
Interval: 2022-10-23 08:10:01+00:00 to 2022-10-23 08:15:01+00:00, Number of Tweets: 500
Interval: 2022-10-23 08:15:01+00:00 to 2022-10-23 08:20:01+00:00, Number of Tweets: 500
Interval: 2022-10-23 08:20:01+00:00 to 2022-10-23 08:25:01+00:00, Number of Tweets: 500
Interval: 2022-10-23 08:25:01+00:00 to 2022-10-23 08:30:01+00:00, Number of Tweets: 500
Interval: 2022-10-23 08:30:01+00:00 to 2022-10-23 08:35:01+00:00, Number of Tweets: 500
Interval: 2022-10-23 08:35:01+00:00 to 2022-10-23 08:40:01+00:00, Number of Tweets: 500
Interval: 2022-10-23 08:40:01+00:00 to 2022-10-23 08:45:01+00:00, Number of Tweets: 500
Interval: 2022-10-23 08:45:01+00:00 to 2022-10-23 08:50:01+00:00, Number of Tweets: 500
Interval: 2022-10-23 08:50:01+00:00 to 2022-10-23 08:55:01+00:00, Number of Tweets: 500
Interval: 2022-10-23 08:55:01+00