## **Gender Based Violence Data Collection Notebook**

In [1]:
# installing packages that will help me collect data from twitter.com

!pip install tweepy
!pip install unidecode



In [2]:
# importing needed python libraries

import tweepy
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
import json
from unidecode import unidecode
import time
import datetime
from tqdm import tqdm
import pandas as pd
import numpy as np

In [3]:
# twitter authentication

consumer_key = 'consumer_key'
consumer_secret = 'consumer_secret'
access_token = 'access_token'
access_secret = 'access_secret'

# connecting to twitter using secret key and access token

auth = tweepy.OAuthHandler(consumer_key, consumer_secret )
auth.set_access_token(access_token, access_secret)
api = tweepy.API(auth)

In [4]:
# fuction that will accept our search query plus the limit to the query being generated

# This function will search a query provided from twitter and, retun a list of all tweets that have that query.

def tweetSearch(query, limit):
    tweets = []
    
# Iterate through Twitter using Tweepy to find our query with our defined limit

    for page in tweepy.Cursor(api.search, q=query, count=limit, tweet_mode="extended", wait_on_rate_limit=True).pages(limit):
        for tweet in page:
            tweets.append(tweet)
    return tweets


In [5]:
# create a function to save tweets into the dataframe
# This function will receive pages of tweets, likes and retweets concerning the query and save them into a pandas data frame.
# Basically, this function will return a pandas data frame that contains data from twitter.

def tweets_to_df(tweets):
    
    df = pd.DataFrame(data=[tweet.full_text.encode('utf-8') for tweet in tweets], columns=["Tweets"])
    df["id"] = np.array([tweet.id for tweet in tweets])
    df["lens"] = np.array([len(tweet.full_text) for tweet in tweets])
    df["date"] = np.array([tweet.created_at for tweet in tweets])
    df["place"] = np.array([tweet.place for tweet in tweets])
    df["coordinates"] = np.array([tweet.coordinates for tweet in tweets])
    df["lang"] = np.array([tweet.lang for tweet in tweets])
    df["source"] = np.array([tweet.source for tweet in tweets])
    df["likes"] = np.array([tweet.favorite_count for tweet in tweets])
    df["retweets"] = np.array([tweet.retweet_count for tweet in tweets])
    return df

In [6]:
# list of hastags
hashtags = ['GBV','#sexism', '#rape']

In [7]:
#The following 'for loop' will collect tweets that have the hashtags mentioned in the list and save the tweets in a csv file

total_tweets = 0

for n in tqdm(hashtags):
# first we fetch all tweets that have specific hashtag
    hash_tweets = tweetSearch(query=n,limit=7000)
    total_tweets += int(len(hash_tweets))
    
# second we convert our tweets into datarame
    df = tweets_to_df(hash_tweets)
    
#third we save the dataframe into csv file
df.to_csv("tweets.csv".format(n))

100%|██████████| 3/3 [24:59<00:00, 499.71s/it]


In [8]:
df

Unnamed: 0,Tweets,id,lens,date,place,coordinates,lang,source,likes,retweets
0,"b'RT @maxandrade_: ""Contribuyente X"". #Rap\xc3...",1392126557636857859,65,2021-05-11 14:36:50,,,es,Twitter for Android,0,26
1,b'RT @PreventBullying: #Rape WARNING. I receiv...,1392126021755879433,144,2021-05-11 14:34:42,,,en,Twitter for iPad,0,0
2,"b""OAP Toolz blasts actor Yomi Fabiyi, over pro...",1392125958740656130,221,2021-05-11 14:34:27,,,en,Twitter for Android,0,0
3,b'RT @MargaretKaine: Book 1 Ring of Clay #Pott...,1392125926817902592,140,2021-05-11 14:34:20,,,en,Twitter for Android,0,18
4,"b'RT @maxandrade_: ""Contribuyente X"". #Rap\xc3...",1392125558486708227,65,2021-05-11 14:32:52,,,es,Twitter for iPad,0,26
...,...,...,...,...,...,...,...,...,...,...
7108,"b""RT @liberte_ny: \xf0\x9f\x8e\xafNot surprisi...",1389335561958313985,140,2021-05-03 21:46:25,,,en,Twitter for iPhone,0,5
7109,b'RT @ZenebuAbera: #Rape is being used as a we...,1389334511880638467,140,2021-05-03 21:42:14,,,en,Twitter for Android,0,2
7110,b'RT @ZenebuAbera: #Rape is being used as a we...,1389333793455157252,140,2021-05-03 21:39:23,,,en,Twitter for iPhone,0,2
7111,b'RT @JumbuTweeple: @SitaramYechury Respected ...,1389333373856882689,144,2021-05-03 21:37:43,,,en,Twitter Web App,0,7
