In [10]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import secrets # password generation
from base64 import b64encode
import json # api formatting
import http.client # api communication
import os

for dirname, _, filenames in os.walk('.'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

./ingest_tweets.ipynb
./tweets.csv


In [11]:
# Load tweets into python
tweets = pd.read_csv('tweets.csv')
tweets.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17410 entries, 0 to 17409
Data columns (total 8 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   name            17410 non-null  object
 1   username        17410 non-null  object
 2   description     14728 non-null  object
 3   location        11432 non-null  object
 4   followers       17410 non-null  int64 
 5   numberstatuses  17410 non-null  int64 
 6   time            17410 non-null  object
 7   tweets          17410 non-null  object
dtypes: int64(2), object(6)
memory usage: 1.1+ MB


In [12]:
# Preview
tweets.head()

Unnamed: 0,name,username,description,location,followers,numberstatuses,time,tweets
0,GunsandCoffee,GunsandCoffee70,ENGLISH TRANSLATIONS: http://t.co/QLdJ0ftews,,640,49,1/6/2015 21:07,ENGLISH TRANSLATION: 'A MESSAGE TO THE TRUTHFU...
1,GunsandCoffee,GunsandCoffee70,ENGLISH TRANSLATIONS: http://t.co/QLdJ0ftews,,640,49,1/6/2015 21:27,ENGLISH TRANSLATION: SHEIKH FATIH AL JAWLANI '...
2,GunsandCoffee,GunsandCoffee70,ENGLISH TRANSLATIONS: http://t.co/QLdJ0ftews,,640,49,1/6/2015 21:29,ENGLISH TRANSLATION: FIRST AUDIO MEETING WITH ...
3,GunsandCoffee,GunsandCoffee70,ENGLISH TRANSLATIONS: http://t.co/QLdJ0ftews,,640,49,1/6/2015 21:37,ENGLISH TRANSLATION: SHEIKH NASIR AL WUHAYSHI ...
4,GunsandCoffee,GunsandCoffee70,ENGLISH TRANSLATIONS: http://t.co/QLdJ0ftews,,640,49,1/6/2015 21:45,ENGLISH TRANSLATION: AQAP: 'RESPONSE TO SHEIKH...


In [13]:

# Create user table
user_cols = ['name', 'username', 'description']
users = tweets[user_cols].drop_duplicates()

# Create emails
users['email'] = users['username'] + '@gmail.com'

# Create passwords
passwords = []
for i in range(0, len(users)):
    passwords.append(secrets.token_urlsafe(16))
    
assert len(np.unique(passwords)) == users.shape[0] 
users['password'] = passwords

# Rename descripton to about_me
users.rename(columns={'description': 'about_me'}, inplace=True)

# Drop null usernames
users.dropna(how='all')

# Preview users
users

Unnamed: 0,name,username,about_me,email,password
0,GunsandCoffee,GunsandCoffee70,ENGLISH TRANSLATIONS: http://t.co/QLdJ0ftews,GunsandCoffee70@gmail.com,bPlFSwp-XGyBY61IV2fTVw
29,Abu Layth Al Hindi,AbuLaythAlHindi,Kik: abulayth2014. Ex South African. Currently...,AbuLaythAlHindi@gmail.com,zktx6zhdWEfSGSJIbEfkaw
31,ابو الدرداء #خلافة,YazeedDhardaa25,Observing a JIHAD NEWS mainly about Islamic St...,YazeedDhardaa25@gmail.com,XmL55lyMTNmrVBt_WcK5mw
33,abu baker aldimashqi,abubakerdimshqi,,abubakerdimshqi@gmail.com,aUiIDNSvJmUhQseDpf9S7A
45,IS_BAQIYA,BaqiyaIs,,BaqiyaIs@gmail.com,XY2CNRJJJYPLa_FukhxZhw
...,...,...,...,...,...
16157,The Caravan,freelance_112,Monitoring terrorism threats globally. we brin...,freelance_112@gmail.com,oMyb2u_uc3DTDuPrXQg7VQ
16629,Anaksabil97,nvor85j,,nvor85j@gmail.com,ziPTSZzjBwUE_VkwwPFPiQ
16634,Mountain Man,Mountainjjoool,,Mountainjjoool@gmail.com,oTt4w9NZ-Lng3X0VG4bysw
16793,Sabil Ghoraba,04_8_1437,,04_8_1437@gmail.com,jcIQ0uz4-EqFRQmJWovzlw


In [14]:
# Create posts table
post_cols = ['username', 'time', 'tweets']
posts = tweets[post_cols]

# Convert \n to <br>
posts['tweets'].replace('\n', '<br>', regex=True, inplace=True)

# Configure time variables
posts['modified_at'] = posts['time']
posts.rename(columns={'time': 'created_at'}, inplace=True)

# Rename tweets to body
posts.rename(columns={'tweets': 'body'}, inplace=True)

# Preview
posts

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._update_inplace(new_data)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


Unnamed: 0,username,created_at,body,modified_at
0,GunsandCoffee70,1/6/2015 21:07,ENGLISH TRANSLATION: 'A MESSAGE TO THE TRUTHFU...,1/6/2015 21:07
1,GunsandCoffee70,1/6/2015 21:27,ENGLISH TRANSLATION: SHEIKH FATIH AL JAWLANI '...,1/6/2015 21:27
2,GunsandCoffee70,1/6/2015 21:29,ENGLISH TRANSLATION: FIRST AUDIO MEETING WITH ...,1/6/2015 21:29
3,GunsandCoffee70,1/6/2015 21:37,ENGLISH TRANSLATION: SHEIKH NASIR AL WUHAYSHI ...,1/6/2015 21:37
4,GunsandCoffee70,1/6/2015 21:45,ENGLISH TRANSLATION: AQAP: 'RESPONSE TO SHEIKH...,1/6/2015 21:45
...,...,...,...,...
17405,nvor85j,5/13/2016 15:46,#Breaking<br>#IslamicState <br>#AmaqAgency <br...,5/13/2016 15:46
17406,nvor85j,5/13/2016 15:50,#Breaking<br><br> Iraqi forces bulldozer destr...,5/13/2016 15:50
17407,nvor85j,5/13/2016 15:51,#Breaking <br>#AmaqAgency <br>#IslamicState <b...,5/13/2016 15:51
17408,nvor85j,5/13/2016 16:07,#PhotoReport <br>#WilayatDimasiqh <br><br>📷The...,5/13/2016 16:07


In [21]:
# Send API requests
connection = http.client.HTTPConnection('localhost:5000')

# API communication functions
def create_user(user, conn=connection):
    try:
        headers = {'Content-type': 'application/json'}
        conn.request('POST', '/api/users', user.to_json(), headers)
        response = connection.getresponse()
        print(response.read().decode())
        return {'status': response.code}
    except:
        print('Creation of user: {} failed.'.format(user['username']))
        return {'status': 500}


def get_token(user, conn=connection):
    try:
        username = user['username']
        password = user['password']
        user_header = {
            "Authorization": "Basic {}".format(
                b64encode(bytes(f"{username}:{password}", "utf-8")).decode("ascii")
            )
        }
        connection.request('POST', '/api/tokens', headers=user_header)
        response = connection.getresponse()
        token = json.loads(response.read())['token']
        return {
            'status': response.code,
            'token': token
            }
    except Exception as e:
        print(f'Token retrieval failed.')
        return {'status': 500, 'token': None}

def get_user_posts(user, posts):
    fields = ['created_at', 'modified_at', 'body']
    payload = posts[posts['username'] == user['username']]
    return payload[fields]

def make_post(post, token, conn=connection):
    headers = {
        'Content-type': 'application/json',
        "Authorization": "Bearer %s" % token
        }
    try:
        conn.request('POST', '/api/posts', post.to_json(), headers)
        response = connection.getresponse()
        print(response.read().decode())
        return {'status': response.code}
    except:
        print('Creation of post failed.')
        return {'status': 500}

In [22]:
# Create Users & Posts
for _, user in users.iterrows():
    # Create User
    response = create_user(user=user, conn=connection)
    if response['status'] == 500:
        continue        
    
    # Get token
    response = get_token(user=user, conn=connection)
    if response['status'] == 500:
        continue
    token = response['token']
    
    # Get user's posts
    user_posts = get_user_posts(user=user, posts=posts)
    
    # Create user's posts
    for _, post in user_posts.iterrows():
        make_post(post=post, token=token, conn=connection)
    


{"error":"Bad Request","message":"please use a different username"}

{"_links":{"author":"/api/users/1","comments":"/api/posts/35/comments","likes":"/api/posts/35/likes","reposts":"/api/posts/35/reposts","self":"/api/posts/35"},"body":"ENGLISH TRANSLATION: 'A MESSAGE TO THE TRUTHFUL IN SYRIA - SHEIKH ABU MUHAMMED AL MAQDISI: http://t.co/73xFszsjvr http://t.co/x8BZcscXzq","comments_count":0,"created_at":"Tue, 06 Jan 2015 21:07:00 GMT","id":35,"is_comment":false,"is_repost":false,"language":null,"likes_count":0,"media_class":null,"media_type":null,"media_url":null,"modified_at":"Tue, 06 Jan 2015 21:07:00 GMT","reposts_count":0,"user_id":1}

{"_links":{"author":"/api/users/1","comments":"/api/posts/36/comments","likes":"/api/posts/36/likes","reposts":"/api/posts/36/reposts","self":"/api/posts/36"},"body":"ENGLISH TRANSLATION: SHEIKH FATIH AL JAWLANI 'FOR THE PEOPLE OF INTEGRITY, SACRIFICE IS  EASY' http://t.co/uqqzXGgVTz http://t.co/A7nbjwyHBr","comments_count":0,"created_at":"Tue, 06 Jan