In [1]:
import pandas as pd
import numpy as np
import sqlite3
from sqlite3 import Error
import json
from os import listdir
from os.path import isfile, join

In [125]:
def lookup(id, list):
    return next((item for item in list if item['id'] == id), None)

def process_json_file(filename, filepath):
    # key is username
    # value is list of elements
    
    filepath = "{}/{}".format(filepath, filename)
    outputdict = {}
    with open(filepath) as f:
        tweet_data = json.load(f)
    
    if "results" in tweet_data: 
        for tweet_obj in tweet_data["results"]: 

            # initialize optional tweet information            
            hashtags, mentions, in_reply_to_user_id = [np.nan]*3
            
            # retrieve all fields for Tweet Object 
            tweet_id = tweet_obj["id"]
            text = tweet_obj["text"]
            created_at = tweet_obj["created_at"]
            
            # retrieve fields for User Object
            user_id = tweet_obj["user"]["id"]
            user_name = tweet_obj["user"]["name"]
            user_followers_count = tweet_obj["user"]["followers_count"]
            user_following_count = tweet_obj["user"]["following"]
            user_friends_count = tweet_obj["user"]["friends_count"]
            user_listed_count = tweet_obj["user"]["listed_count"]
            user_favourites_count = tweet_obj["user"]["favourites_count"]
            user_created_at = tweet_obj["user"]["created_at"]
            user_description = tweet_obj["user"]["description"]

            # optional fields 

            if "in_reply_to_user_id" in tweet_obj: 
                in_reply_to_user_id = tweet_obj["in_reply_to_user_id"]

            if "entities" in tweet_obj: 
                if "hashtags" in tweet_obj["entities"]:
                    hashtags_list = [x["text"] for x in tweet_obj["entities"]["hashtags"]]
                    hashtags = ", ".join(hashtags_list)

                if "user_mentions" in tweet_obj["entities"]:
                    mentions_list = [x["screen_name"] for x in tweet_obj["entities"]["user_mentions"]]
                    mentions = ", ".join(mentions_list)

                    
            if "extended_tweet" in tweet_obj: 
                text = tweet_obj["extended_tweet"]["full_text"]
                hashtags_list = [x["text"] for x in tweet_obj["extended_tweet"]["entities"]["hashtags"]]
                hashtags = ", ".join(hashtags_list)
                
                mentions_list = [x["screen_name"] for x in tweet_obj["extended_tweet"]["entities"]["user_mentions"]]
                mentions = ", ".join(mentions_list)
            
            retweet_count = tweet_obj["retweet_count"]
            reply_count = tweet_obj["reply_count"]
            favorite_count = tweet_obj["favorite_count"]
            quote_count = tweet_obj["quote_count"]


            outputdict[tweet_id] = [text, created_at, user_id, user_name, reply_count, favorite_count, quote_count, retweet_count,
                                    hashtags, mentions, in_reply_to_user_id, user_created_at, user_followers_count, 
                                    user_following_count, user_friends_count, user_listed_count, 
                                    user_favourites_count, user_description]
    
    else: 
        outputdict = None
    
    return outputdict


In [91]:
output_files_path = "tweets"
json_tweet_files = [f for f in listdir(output_files_path) if (isfile(join(output_files_path, f))) & ("json" in f)]
len(json_tweet_files)


13

In [92]:
all_tweets_dict = {}

for jsonfile in json_tweet_files:
    tweets_bunch = process_json_file(jsonfile)
    if tweets_bunch == None:
        continue
    all_tweets_dict.update(tweets_bunch)

dataframe_labels = ["text", "created_at", "user_id", "user_name", "reply_count", "favorite_count", "quote_count", 
                    "retweet_count", "hashtags", "mentions", "in_reply_to_user_id", "user_created_at", "user_followers_count", 
                    "user_following_count", "user_friends_count", "user_listed_count", 
                    "user_favourites_count", "user_description"]

tweets_df = pd.DataFrame.from_dict(all_tweets_dict, 
                                   orient="index", 
                                   columns=dataframe_labels)
print(tweets_df.shape)
tweets_df.head(3)


(1300, 18)


Unnamed: 0,text,created_at,user_id,user_name,reply_count,favorite_count,quote_count,retweet_count,hashtags,mentions,in_reply_to_user_id,user_created_at,user_followers_count,user_following_count,user_friends_count,user_listed_count,user_favourites_count,user_description
1420408706907394055,⠀@DeniseStefanie is “All In” to support Race f...,Wed Jul 28 15:40:00 +0000 2021,978265108563529728,RaceForRP,0,6,1,0,"allinforRP, RaceforRP, RelapsingPolychondritis...","DeniseStefanie, MusicCityGP, GoTransAm",,Mon Mar 26 13:39:11 +0000 2018,2871,,1450,8,4765,The Race for RP drives awareness and accelerat...
1420096159100571657,@ccfernandez Hi Charlene! I'm from Mexico and ...,Tue Jul 27 18:58:03 +0000 2021,1253109780421509123,Frijol En El Mundo,1,5,0,0,"RareDisease, RelapsingPolychondritis, chronicD...",ccfernandez,9.897224e+17,Wed Apr 22 23:54:07 +0000 2020,43,,280,0,843,Just a human being trying to do my best
1420065257553289224,RT @RaceForRP: We’re excited that @DeniseStefa...,Tue Jul 27 16:55:15 +0000 2021,2438221916,Flewitt Racing,0,0,0,0,,"RaceForRP, DeniseStefanie, MusicCityGP",,Fri Apr 11 09:20:00 +0000 2014,1493,,220,21,25972,Pure McLaren GT Series Champion 2018 & 2019 🏆 ...


In [93]:
# check for any null text 
tweets_df[tweets_df["text"].isnull()==True]


Unnamed: 0,text,created_at,user_id,user_name,reply_count,favorite_count,quote_count,retweet_count,hashtags,mentions,in_reply_to_user_id,user_created_at,user_followers_count,user_following_count,user_friends_count,user_listed_count,user_favourites_count,user_description


In [150]:
output_files_path = "tweets/9.19.2020_11.27.2021"
json_tweet_files = [f for f in listdir(output_files_path) if (isfile(join(output_files_path, f))) & ("json" in f)]
print(len(json_tweet_files))

all_tweets = {}

for jsonfile in json_tweet_files:
    tweets_bunch = process_json_file(jsonfile, output_files_path)
    if tweets_bunch == None:
        continue
    all_tweets.update(tweets_bunch)

dataframe_labels = ["text", "created_at", "user_id", "user_name", "reply_count", "favorite_count", "quote_count", 
                    "retweet_count", "hashtags", "mentions", "in_reply_to_user_id", "user_created_at", "user_followers_count", 
                    "user_following_count", "user_friends_count", "user_listed_count", 
                    "user_favourites_count", "user_description"]

tweets_df1 = pd.DataFrame.from_dict(all_tweets, 
                                   orient="index", 
                                   columns=dataframe_labels)
tweets_df1 = tweets_df1.reset_index()
print(tweets_df1.shape)
tweets_df1.head(3)


13
(1300, 19)


Unnamed: 0,index,text,created_at,user_id,user_name,reply_count,favorite_count,quote_count,retweet_count,hashtags,mentions,in_reply_to_user_id,user_created_at,user_followers_count,user_following_count,user_friends_count,user_listed_count,user_favourites_count,user_description
0,1420408706907394055,⠀@DeniseStefanie is “All In” to support Race f...,Wed Jul 28 15:40:00 +0000 2021,978265108563529728,RaceForRP,0,6,1,0,"allinforRP, RaceforRP, RelapsingPolychondritis...","DeniseStefanie, MusicCityGP, GoTransAm",,Mon Mar 26 13:39:11 +0000 2018,2871,,1450,8,4765,The Race for RP drives awareness and accelerat...
1,1420096159100571657,@ccfernandez Hi Charlene! I'm from Mexico and ...,Tue Jul 27 18:58:03 +0000 2021,1253109780421509123,Frijol En El Mundo,1,5,0,0,"RareDisease, RelapsingPolychondritis, chronicD...",ccfernandez,9.897224e+17,Wed Apr 22 23:54:07 +0000 2020,43,,280,0,843,Just a human being trying to do my best
2,1420065257553289224,RT @RaceForRP: We’re excited that @DeniseStefa...,Tue Jul 27 16:55:15 +0000 2021,2438221916,Flewitt Racing,0,0,0,0,,"RaceForRP, DeniseStefanie, MusicCityGP",,Fri Apr 11 09:20:00 +0000 2014,1493,,220,21,25972,Pure McLaren GT Series Champion 2018 & 2019 🏆 ...


In [151]:
output_files_path = "tweets/9.17.2019_9.18.2020"
json_tweet_files = [f for f in listdir(output_files_path) if (isfile(join(output_files_path, f))) & ("json" in f)]
print(len(json_tweet_files))

all_tweets = {}

for jsonfile in json_tweet_files:
    tweets_bunch = process_json_file(jsonfile, output_files_path)
    if tweets_bunch == None:
        continue
    all_tweets.update(tweets_bunch)

dataframe_labels = ["text", "created_at", "user_id", "user_name", "reply_count", "favorite_count", "quote_count", 
                    "retweet_count", "hashtags", "mentions", "in_reply_to_user_id", "user_created_at", "user_followers_count", 
                    "user_following_count", "user_friends_count", "user_listed_count", 
                    "user_favourites_count", "user_description"]

tweets_df2 = pd.DataFrame.from_dict(all_tweets, 
                                   orient="index", 
                                   columns=dataframe_labels)
tweets_df2 = tweets_df2.reset_index()
print(tweets_df2.shape)
tweets_df2.head(3)


8
(737, 19)


Unnamed: 0,index,text,created_at,user_id,user_name,reply_count,favorite_count,quote_count,retweet_count,hashtags,mentions,in_reply_to_user_id,user_created_at,user_followers_count,user_following_count,user_friends_count,user_listed_count,user_favourites_count,user_description
0,1227524254259789824,RT @ClinRheumatol: 63 patients (72.4%) with #R...,Wed Feb 12 09:26:03 +0000 2020,1204699284643274753,Jeet Patel,0,0,0,0,RelapsingPolychondritis,ClinRheumatol,,Wed Dec 11 09:47:58 +0000 2019,88,,139,0,830,
1,1227488477673148416,RT @ClinRheumatol: 63 patients (72.4%) with #R...,Wed Feb 12 07:03:53 +0000 2020,1132161473688539144,Dana Doskaliuk,0,0,0,0,RelapsingPolychondritis,ClinRheumatol,,Sat May 25 05:48:24 +0000 2019,331,,626,1,880,"#MD, #PhD_student with interests in the field ..."
2,1227454093477711874,RT @RaceForRP: An engaging recap of @Roadshagg...,Wed Feb 12 04:47:15 +0000 2020,103586287,James Bogue,0,0,0,0,,"RaceForRP, roadshagger",,Sun Jan 10 14:53:09 +0000 2010,1248,,1863,58,25752,Sportscar racing worldwide for fun


In [137]:
tweets_so_far = pd.concat([tweets_df1, tweets_df2])
# remove any duplicates by id 
tweets_so_far = tweets_so_far.drop_duplicates(["index"])


Unnamed: 0,index,text,created_at,user_id,user_name,reply_count,favorite_count,quote_count,retweet_count,hashtags,mentions,in_reply_to_user_id,user_created_at,user_followers_count,user_following_count,user_friends_count,user_listed_count,user_favourites_count,user_description
0,1420408706907394055,⠀@DeniseStefanie is “All In” to support Race f...,Wed Jul 28 15:40:00 +0000 2021,978265108563529728,RaceForRP,0,6,1,0,"allinforRP, RaceforRP, RelapsingPolychondritis...","DeniseStefanie, MusicCityGP, GoTransAm",,Mon Mar 26 13:39:11 +0000 2018,2871,,1450,8,4765,The Race for RP drives awareness and accelerat...
1,1420096159100571657,@ccfernandez Hi Charlene! I'm from Mexico and ...,Tue Jul 27 18:58:03 +0000 2021,1253109780421509123,Frijol En El Mundo,1,5,0,0,"RareDisease, RelapsingPolychondritis, chronicD...",ccfernandez,9.897224e+17,Wed Apr 22 23:54:07 +0000 2020,43,,280,0,843,Just a human being trying to do my best
2,1420065257553289224,RT @RaceForRP: We’re excited that @DeniseStefa...,Tue Jul 27 16:55:15 +0000 2021,2438221916,Flewitt Racing,0,0,0,0,,"RaceForRP, DeniseStefanie, MusicCityGP",,Fri Apr 11 09:20:00 +0000 2014,1493,,220,21,25972,Pure McLaren GT Series Champion 2018 & 2019 🏆 ...
3,1420051255502721027,RT @RaceForRP: We’re excited that @DeniseStefa...,Tue Jul 27 15:59:37 +0000 2021,475918341,The Trans Am Series,0,0,0,0,,"RaceForRP, DeniseStefanie, MusicCityGP",,Fri Jan 27 15:41:50 +0000 2012,6987,,415,93,5774,The Official Trans Am Series Presented By Pire...
4,1420036578840354825,We’re excited that @DeniseStefanie will attend...,Tue Jul 27 15:01:18 +0000 2021,978265108563529728,RaceForRP,0,9,1,3,"allinforRP, RaceforRP, RelapsingPolychondritis","DeniseStefanie, MusicCityGP, GoTransAm",,Mon Mar 26 13:39:11 +0000 2018,2871,,1450,8,4765,The Race for RP drives awareness and accelerat...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
732,1228666862227132417,The relapsing polychondritis (RP) community is...,Sat Feb 15 13:06:22 +0000 2020,978265108563529728,RaceForRP,0,9,0,1,"RaceforRP, RelapsingPolychondritis, Autoimmune...","KohR_MS, AMR_Official, NateStacyRacing, KyleMa...",,Mon Mar 26 13:39:11 +0000 2018,2871,,1450,8,4766,The Race for RP drives awareness and accelerat...
733,1228124331063955456,RT @RaceForRP: Hold on tight as you go ice dri...,Fri Feb 14 01:10:32 +0000 2020,389438982,R. Bartholomew,0,0,0,0,,RaceForRP,,Wed Oct 12 12:33:47 +0000 2011,228,,1227,1,18338,"IT Guy 🖱️. Lover of fast cars🏎️, scenic hikes🏔..."
734,1228070650104995844,Hold on tight as you go ice driving in a McLar...,Thu Feb 13 21:37:14 +0000 2020,978265108563529728,RaceForRP,0,3,0,1,"PureMcLaren, IceDriving, Drifting, McLaren, Ra...","McLarenAuto, customerracing",,Mon Mar 26 13:39:11 +0000 2018,2871,,1450,8,4766,The Race for RP drives awareness and accelerat...
735,1227923522858688512,RT @ClinRheumatol: 63 patients (72.4%) with #R...,Thu Feb 13 11:52:36 +0000 2020,1157400803361075200,Nataliya Bek,0,0,0,0,RelapsingPolychondritis,ClinRheumatol,,Fri Aug 02 21:20:29 +0000 2019,60,,62,0,2209,"#cardiologist MD,PhD Danylo Halytsky Lviv Nati..."


In [152]:
output_files_path = "tweets/9.15.2018_9.16.2019"
json_tweet_files = [f for f in listdir(output_files_path) if (isfile(join(output_files_path, f))) & ("json" in f)]
print(len(json_tweet_files))

all_tweets = {}

for jsonfile in json_tweet_files:
    tweets_bunch = process_json_file(jsonfile, output_files_path)
    if tweets_bunch == None:
        continue
    all_tweets.update(tweets_bunch)

dataframe_labels = ["text", "created_at", "user_id", "user_name", "reply_count", "favorite_count", "quote_count", 
                    "retweet_count", "hashtags", "mentions", "in_reply_to_user_id", "user_created_at", "user_followers_count", 
                    "user_following_count", "user_friends_count", "user_listed_count", 
                    "user_favourites_count", "user_description"]

tweets_df3 = pd.DataFrame.from_dict(all_tweets, 
                                   orient="index", 
                                   columns=dataframe_labels)
tweets_df3 = tweets_df3.reset_index()
print(tweets_df3.shape)
tweets_df3.head(3)


6
(566, 19)


Unnamed: 0,index,text,created_at,user_id,user_name,reply_count,favorite_count,quote_count,retweet_count,hashtags,mentions,in_reply_to_user_id,user_created_at,user_followers_count,user_following_count,user_friends_count,user_listed_count,user_favourites_count,user_description
0,1046537685140037632,RT @RaceForRP: #RaceforRP at the @pvconcours. ...,Sun Sep 30 23:10:02 +0000 2018,620250601,ScuderiaCorsa,0,0,0,0,"RaceforRP, Ferraris, RelapsingPolychondritis, ...","RaceForRP, PVConcours",,Wed Jun 27 18:52:07 +0000 2012,7469,,471,248,12248,A multi-championship winning race team running...
1,1046532770866978816,RT @RaceForRP: #RaceforRP at the @pvconcours. ...,Sun Sep 30 22:50:30 +0000 2018,339374014,PV Concours d’Elegance,0,0,0,0,"RaceforRP, Ferraris, RelapsingPolychondritis, ...","RaceForRP, PVConcours",,Thu Jul 21 00:46:40 +0000 2011,233,,417,6,1741,The Palos Verdes Concours d’Elegance presents ...
2,1046531543844487168,#RaceforRP at the @pvconcours. Two #Ferraris b...,Sun Sep 30 22:45:38 +0000 2018,978265108563529728,RaceForRP,0,18,0,10,"RaceforRP, Ferraris, RelapsingPolychondritis, ...","PVConcours, Scuderia_Corsa, FerrariSouthBay",,Mon Mar 26 13:39:11 +0000 2018,2871,,1450,8,4766,The Race for RP drives awareness and accelerat...


In [153]:
tweets_so_far = pd.concat([tweets_df1, tweets_df2, tweets_df3])
# remove any duplicates by id 
tweets_so_far = tweets_so_far.drop_duplicates(["index"])
tweets_so_far


Unnamed: 0,index,text,created_at,user_id,user_name,reply_count,favorite_count,quote_count,retweet_count,hashtags,mentions,in_reply_to_user_id,user_created_at,user_followers_count,user_following_count,user_friends_count,user_listed_count,user_favourites_count,user_description
0,1420408706907394055,⠀@DeniseStefanie is “All In” to support Race f...,Wed Jul 28 15:40:00 +0000 2021,978265108563529728,RaceForRP,0,6,1,0,"allinforRP, RaceforRP, RelapsingPolychondritis...","DeniseStefanie, MusicCityGP, GoTransAm",,Mon Mar 26 13:39:11 +0000 2018,2871,,1450,8,4765,The Race for RP drives awareness and accelerat...
1,1420096159100571657,@ccfernandez Hi Charlene! I'm from Mexico and ...,Tue Jul 27 18:58:03 +0000 2021,1253109780421509123,Frijol En El Mundo,1,5,0,0,"RareDisease, RelapsingPolychondritis, chronicD...",ccfernandez,9.897224e+17,Wed Apr 22 23:54:07 +0000 2020,43,,280,0,843,Just a human being trying to do my best
2,1420065257553289224,RT @RaceForRP: We’re excited that @DeniseStefa...,Tue Jul 27 16:55:15 +0000 2021,2438221916,Flewitt Racing,0,0,0,0,,"RaceForRP, DeniseStefanie, MusicCityGP",,Fri Apr 11 09:20:00 +0000 2014,1493,,220,21,25972,Pure McLaren GT Series Champion 2018 & 2019 🏆 ...
3,1420051255502721027,RT @RaceForRP: We’re excited that @DeniseStefa...,Tue Jul 27 15:59:37 +0000 2021,475918341,The Trans Am Series,0,0,0,0,,"RaceForRP, DeniseStefanie, MusicCityGP",,Fri Jan 27 15:41:50 +0000 2012,6987,,415,93,5774,The Official Trans Am Series Presented By Pire...
4,1420036578840354825,We’re excited that @DeniseStefanie will attend...,Tue Jul 27 15:01:18 +0000 2021,978265108563529728,RaceForRP,0,9,1,3,"allinforRP, RaceforRP, RelapsingPolychondritis","DeniseStefanie, MusicCityGP, GoTransAm",,Mon Mar 26 13:39:11 +0000 2018,2871,,1450,8,4765,The Race for RP drives awareness and accelerat...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
561,1046641550279802881,RT @RaceForRP: #RaceforRP at the @pvconcours. ...,Mon Oct 01 06:02:45 +0000 2018,3566281755,toa ferrari,0,0,0,0,"RaceforRP, Ferraris, RelapsingPolychondritis, ...","RaceForRP, PVConcours",,Sun Sep 06 12:58:27 +0000 2015,123,,192,2,42242,
562,1046584444612743178,RT @Polychondritis: I love this. It's so true....,Mon Oct 01 02:15:50 +0000 2018,1544619404,Nadia,0,0,0,0,chronicillness,Polychondritis,,Tue Jun 25 02:38:28 +0000 2013,141,,799,1,5947,
563,1046563651526774785,RT @RaceForRP: #RaceforRP at the @pvconcours. ...,Mon Oct 01 00:53:13 +0000 2018,1020689741908455428,Relapsing Polychondritis Foundation,0,0,0,0,"RaceforRP, Ferraris, RelapsingPolychondritis, ...","RaceForRP, PVConcours",,Sat Jul 21 15:19:32 +0000 2018,1263,,2597,3,515,The RP Foundation’s purpose is to facilitate a...
564,1046547854507085825,RT @RaceForRP: #RaceforRP at the @pvconcours. ...,Sun Sep 30 23:50:27 +0000 2018,2550764408,SFC Philadelphia,0,0,0,0,"RaceforRP, Ferraris, RelapsingPolychondritis, ...","RaceForRP, PVConcours",,Fri Jun 06 19:03:43 +0000 2014,577,,979,64,20060,Official Ferrari Club Member of @SFerrariClub ...


In [155]:
output_files_path = "tweets/9.13.2017_9.14.2018"
json_tweet_files = [f for f in listdir(output_files_path) if (isfile(join(output_files_path, f))) & ("json" in f)]
print(len(json_tweet_files))

all_tweets = {}

for jsonfile in json_tweet_files:
    tweets_bunch = process_json_file(jsonfile, output_files_path)
    if tweets_bunch == None:
        continue
    all_tweets.update(tweets_bunch)

dataframe_labels = ["text", "created_at", "user_id", "user_name", "reply_count", "favorite_count", "quote_count", 
                    "retweet_count", "hashtags", "mentions", "in_reply_to_user_id", "user_created_at", "user_followers_count", 
                    "user_following_count", "user_friends_count", "user_listed_count", 
                    "user_favourites_count", "user_description"]

tweets_df4 = pd.DataFrame.from_dict(all_tweets, 
                                   orient="index", 
                                   columns=dataframe_labels)
tweets_df4 = tweets_df4.reset_index()
print(tweets_df4.shape)
tweets_df4.head(3)

3
(262, 19)


Unnamed: 0,index,text,created_at,user_id,user_name,reply_count,favorite_count,quote_count,retweet_count,hashtags,mentions,in_reply_to_user_id,user_created_at,user_followers_count,user_following_count,user_friends_count,user_listed_count,user_favourites_count,user_description
0,1022910178809786368,Here at @WGI for the #FerrariChallenge with @S...,Fri Jul 27 18:22:46 +0000 2018,978265108563529728,RaceForRP,0,10,0,2,"FerrariChallenge, RaceforRP, RelapsingPolychon...","WGI, Scuderia_Corsa, aarda",,Mon Mar 26 13:39:11 +0000 2018,2871,,1450,8,4766,The Race for RP drives awareness and accelerat...
1,1022814173221330945,RT @RPASF_Official: This is the new official T...,Fri Jul 27 12:01:16 +0000 2018,133672653,Victoria D.,0,0,0,0,RelapsingPolychondritis,RPASF_Official,,Fri Apr 16 10:32:00 +0000 2010,475,,2633,149,11203,"Professional Nurse,Impassioned Humanitarian & ..."
2,1022813959689265153,RT @RaceForRP: Exciting changes in leadership ...,Fri Jul 27 12:00:25 +0000 2018,133672653,Victoria D.,0,0,0,0,,RaceForRP,,Fri Apr 16 10:32:00 +0000 2010,475,,2633,149,11203,"Professional Nurse,Impassioned Humanitarian & ..."


In [157]:
tweets_so_far = pd.concat([tweets_df1, tweets_df2, tweets_df3, tweets_df4])
# remove any duplicates by id 
tweets_so_far = tweets_so_far.drop_duplicates(["index"])
tweets_so_far


Unnamed: 0,index,text,created_at,user_id,user_name,reply_count,favorite_count,quote_count,retweet_count,hashtags,mentions,in_reply_to_user_id,user_created_at,user_followers_count,user_following_count,user_friends_count,user_listed_count,user_favourites_count,user_description
0,1420408706907394055,⠀@DeniseStefanie is “All In” to support Race f...,Wed Jul 28 15:40:00 +0000 2021,978265108563529728,RaceForRP,0,6,1,0,"allinforRP, RaceforRP, RelapsingPolychondritis...","DeniseStefanie, MusicCityGP, GoTransAm",,Mon Mar 26 13:39:11 +0000 2018,2871,,1450,8,4765,The Race for RP drives awareness and accelerat...
1,1420096159100571657,@ccfernandez Hi Charlene! I'm from Mexico and ...,Tue Jul 27 18:58:03 +0000 2021,1253109780421509123,Frijol En El Mundo,1,5,0,0,"RareDisease, RelapsingPolychondritis, chronicD...",ccfernandez,9.897224e+17,Wed Apr 22 23:54:07 +0000 2020,43,,280,0,843,Just a human being trying to do my best
2,1420065257553289224,RT @RaceForRP: We’re excited that @DeniseStefa...,Tue Jul 27 16:55:15 +0000 2021,2438221916,Flewitt Racing,0,0,0,0,,"RaceForRP, DeniseStefanie, MusicCityGP",,Fri Apr 11 09:20:00 +0000 2014,1493,,220,21,25972,Pure McLaren GT Series Champion 2018 & 2019 🏆 ...
3,1420051255502721027,RT @RaceForRP: We’re excited that @DeniseStefa...,Tue Jul 27 15:59:37 +0000 2021,475918341,The Trans Am Series,0,0,0,0,,"RaceForRP, DeniseStefanie, MusicCityGP",,Fri Jan 27 15:41:50 +0000 2012,6987,,415,93,5774,The Official Trans Am Series Presented By Pire...
4,1420036578840354825,We’re excited that @DeniseStefanie will attend...,Tue Jul 27 15:01:18 +0000 2021,978265108563529728,RaceForRP,0,9,1,3,"allinforRP, RaceforRP, RelapsingPolychondritis","DeniseStefanie, MusicCityGP, GoTransAm",,Mon Mar 26 13:39:11 +0000 2018,2871,,1450,8,4765,The Race for RP drives awareness and accelerat...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
257,920187089043447808,5 Symptoms Of Relapsing Polychondritis\nhttps:...,Tue Oct 17 07:17:52 +0000 2017,2744154061,arthritistreatment,0,0,0,0,"symptoms, relapsingpolychondritis, polychondritis",,,Tue Aug 19 06:10:18 +0000 2014,84,,40,24,0,Arthritis Treatment and Natural Cure
258,914455062537277440,RT @Women_Bicycling: #RP #RaceforRP #LagunaSec...,Sun Oct 01 11:40:51 +0000 2017,297987127,R Polychondritis,0,0,0,0,"RP, RaceforRP, LagunaSeca, RelapsingPolychondr...",Women_Bicycling,,Fri May 13 13:29:34 +0000 2011,828,,634,46,6425,Creating awareness of the rare disease Relapsi...
259,912744907810390016,#RP #RaceforRP #LagunaSeca #RelapsingPolychond...,Tue Sep 26 18:25:18 +0000 2017,804106520774352896,Women Bicyling,0,1,0,1,"RP, RaceforRP, LagunaSeca, RelapsingPolychondr...",,,Wed Nov 30 23:35:08 +0000 2016,13,,8,0,4,"Women Bicycling, advocacy for health, wellness..."
260,912406076758228993,RT @JenniferNaida: #RareDiseaseDay #Vasculiti...,Mon Sep 25 19:58:55 +0000 2017,228229296,H,0,0,0,0,"RareDiseaseDay, Vasculitis, RelapsingPolychond...",JenniferNaida,,Sun Dec 19 03:25:40 +0000 2010,317,,206,8,6257,"Im Sarcastic,Im http://Unique.So are U;-)If yo..."


In [160]:
# export to CSV
tweets_so_far.to_csv("tweets.csv", index=True)
