# Required Libraries

In [1]:
!pip install tweepy



In [2]:
import tweepy
from pathlib import Path
import pandas as pd
import json
import os
from tqdm import tqdm

# Connect to the API

In [3]:
CONSUMER_KEY = os.environ['CONSUMER_KEY']
CONSUMER_SECRET = os.environ['CONSUMER_SECRET']
ACCESS_TOKEN = os.environ['ACCESS_TOKEN']
ACCESS_TOKEN_SECRET = os.environ['ACCESS_TOKEN_SECRET']
BEARER_TOKEN = os.environ['BEARER_TOKEN']

In [4]:
auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
api = tweepy.API(auth, wait_on_rate_limit=True)

# Call the API

In [5]:
tweets = []
dump_path = Path('./data/twitter_data/')
if not dump_path.exists():
    dump_path.mkdir()

for page in tqdm(tweepy.Cursor(
    api.search_tweets,
    tweet_mode='extended',
    q = "#machine_learning",
    count = 10,
    # lang="en",
).pages(2)):
    for tweet in page:
        json_data = tweet._json
        with open(dump_path / f'{json_data["id"]}.json', 'w') as f:
            json.dump(json_data, f)

# Read Dumped Data

In [6]:
DATA_DIR = Path('./data/twitter_data')

In [7]:
def read_json(file_path):
    with open(file_path) as f:
        return json.load(f)

In [8]:
rows = []
for file_path in tqdm(DATA_DIR.iterdir()):
    if file_path.is_dir():
        continue
    d = read_json(file_path)
    
    rows.append(dict(
        name = d['user']['name'],
        followers = d['user']['followers_count'],
        following = d['user']['friends_count'],
        follower_following_ratio =  d['user']['followers_count'] / (d['user']['friends_count'] + 1),
        text = d.get('full_text') or d.get('text'),
        hashtags = list(map(lambda item: item['text'], d['entities']['hashtags'])),
        likes = d['favorite_count'],
        retweets = d['retweet_count'],
    ))

20it [00:00, 2630.98it/s]


In [9]:
df = pd.DataFrame(rows)

In [10]:
pd.set_option('display.min_rows', 20)
pd.set_option('display.max_colwidth', 200)

In [11]:
df

Unnamed: 0,name,followers,following,follower_following_ratio,text,hashtags,likes,retweets
0,Lucifer AI,37,194,0.189744,#30DaysOfCodechallenge\nDay22\nNot done much today. Just started eda for new project\n#30Daysofcode challenge to train my model with ML algorithms intuitions. \nSo much to data to train. .\n#machi...,"[30DaysOfCodechallenge, 30Daysofcode, machine_learning, DataScience, bot_training, Python]",0,5
1,Coding Buddy,9,2,3.0,RT @lucifer_twtt: #30DaysOfCodechallenge\nDay22\nNot done much today. Just started eda for new project\n#30Daysofcode challenge to train my mo…,"[30DaysOfCodechallenge, 30Daysofcode]",0,5
2,AI Bot by uCloudify.com,991,0,991.0,RT @lucifer_twtt: #30DaysOfCodechallenge\nDay22\nNot done much today. Just started eda for new project\n#30Daysofcode challenge to train my mo…,"[30DaysOfCodechallenge, 30Daysofcode]",0,5
3,#30DaysOfCode,2318,1,1159.0,RT @lucifer_twtt: #30DaysOfCodechallenge\nDay22\nNot done much today. Just started eda for new project\n#30Daysofcode challenge to train my mo…,"[30DaysOfCodechallenge, 30Daysofcode]",0,5
4,PyBot,902,1,451.0,RT @lucifer_twtt: #30DaysOfCodechallenge\nDay22\nNot done much today. Just started eda for new project\n#30Daysofcode challenge to train my mo…,"[30DaysOfCodechallenge, 30Daysofcode]",0,5
5,Mr Data Scientist,10965,270,40.461255,RT @lucifer_twtt: #30DaysOfCodechallenge\nDay22\nNot done much today. Just started eda for new project\n#30Daysofcode challenge to train my mo…,"[30DaysOfCodechallenge, 30Daysofcode]",0,5
6,SUPER WRITERS,178,441,0.402715,We can complete your;\n#Homework \n#Machine_Learning \n#Data_Science\n#Assignments\n#Stats \n#Fall_classes\n#Finals\n#Pearson\n#Python\n#R_programming_Language\n#Stata\n#Spss\n#JavaScript\nGet Qui...,"[Homework, Machine_Learning, Data_Science, Assignments, Stats, Fall_classes, Finals, Pearson, Python, R_programming_Language, Stata, Spss, JavaScript, We_deliver]",0,2
7,PyBot,902,1,451.0,RT @superwriterz: We can complete your;\n#Homework \n#Machine_Learning \n#Data_Science\n#Assignments\n#Stats \n#Fall_classes\n#Finals\n#Pearson\n#Py…,"[Homework, Machine_Learning, Data_Science, Assignments, Stats, Fall_classes, Finals, Pearson]",0,2
8,Xeron Bot,2309,1,1154.5,RT @superwriterz: We can complete your;\n#Homework \n#Machine_Learning \n#Data_Science\n#Assignments\n#Stats \n#Fall_classes\n#Finals\n#Pearson\n#Py…,"[Homework, Machine_Learning, Data_Science, Assignments, Stats, Fall_classes, Finals, Pearson]",0,2
9,//InsertUsefulComment,45,212,0.211268,RT @SmitterHane: Just use it\n#DataScience #CodeNewbie #code #100DaysOfCode #100Devs #python #machine_learning #ArtificialIntelligence #joke…,"[DataScience, CodeNewbie, code, 100DaysOfCode, 100Devs, python, machine_learning, ArtificialIntelligence]",0,24
