#SoMe Topic Modeling Notebook | Release canvas 1 📖


##Data Cleaning 🧹

In [70]:
# Installations
import sys
if 'google.colab' in sys.modules:
    !pip install emoji --upgrade
    !pip install pandas-profiling==2.*
    !pip install plotly==4.*

Requirement already up-to-date: emoji in /usr/local/lib/python3.6/dist-packages (0.5.4)


In [0]:
# Required Libraries
import json
import requests
import pandas as pd
import numpy as np
import emoji
import regex
import re

In [72]:
# Loading the JSON file 
url = 'https://raw.githubusercontent.com/Lambda-School-Labs/social-media-strategy-ds/master/topic_modeling_notebooks/elonmusk_followers%20(1).json'
df = requests.get(url).json()
df

{'1260381214369775616': 'This kid will forever be a legend 😂 https://t.co/dwsL4fBv9g',
 '1259666377461444609': 'If you truly believe Lebrons mindset, competitive fire, intensity, drive to win, or love for the game match Jordan’s I can no longer value your opinion on anything in real life.',
 '1259206653637013507': 'BUTTLICKER! OUR PRICES HAVE NEVER BEEN LOWER!!!',
 '1261377495388917763': '@Bhuvan_Bam ❤️❤️',
 '1252099928857378816': "I'm not crying you're crying.\nhttps://t.co/BcFz6f0Em2",
 '1259558827751616513': 'Vintage images reimagined 🎨 By @k_koi https://t.co/JukcqAE61T',
 '1259362784204197889': 'How close my life is to falling apart https://t.co/MtgxoUO1bL',
 '1261448143494922240': '4 million subs customs tonight? 🤠\n\nyay or nay',
 '1261060136346497025': 'What would Country Club Foxy know about gang life? https://t.co/6buhkxVVxO',
 '1261047478830456833': "@stephen_kcco @ThePodPMI @ShahkGang @evanfoxy What's your xbox gt?",
 '1261047742245281792': '@stephen_kcco @ZitosArmy @reaper6

In [73]:
# Converting the dataset to pandas DataFrame and renaming the columns 
df = pd.DataFrame(df.values())
df = df.rename(columns={0:'tweets'})
df.head()

Unnamed: 0,tweets
0,This kid will forever be a legend 😂 https://t....
1,"If you truly believe Lebrons mindset, competit..."
2,BUTTLICKER! OUR PRICES HAVE NEVER BEEN LOWER!!!
3,@Bhuvan_Bam ❤️❤️
4,I'm not crying you're crying.\nhttps://t.co/Bc...


In [76]:
#Removing emojies from text and and puttin them in a new column
#Refrence 1 : https://stackoverflow.com/questions/43146528/how-to-extract-all-the-emojis-from-text
#Refrence 2 : https://stackoverflow.com/questions/33404752/removing-emojis-from-a-string-in-python
#Refrence 3 : https://stackoverflow.com/questions/11331982/how-to-remove-any-url-within-a-string-in-python

### Question: How to combine all functions into one 
def extract_emoji(text):
    '''
    Extracts emojies from text
    '''
    emoji_list = []
    data = regex.findall(r'\X', text)
    for word in data:
        if any(char in emoji.UNICODE_EMOJI for char in word):
            emoji_list.append(word)
    return emoji_list

def emoji_free_text(text):
    '''
    Cleans text from emojies
    '''
    emoji_list_1 = [c for c in text if c in emoji.UNICODE_EMOJI]
    clean_text = ' '.join([str for str in text.split() if not any(i in str for i in emoji_list_1)])
    return clean_text

def url_free_text(text):
    '''
    Cleans text from urls
    '''
    text = re.sub(r'http\S+', '', text)
    return text

#Create a new column with emojies from each tweet
df['tweet_emojies'] = df['tweets'].apply(lambda x : extract_emoji(x))

#Create a new column with cleaned tweets from emojies
df['emoji_free_tweets'] = df['tweets'].apply(lambda x : emoji_free_text(x))

#Create a new column with url free tweets
df['url_free_tweets'] = df['emoji_free_tweets'].apply(lambda x : url_free_text(x))

df.head()

Unnamed: 0,tweets,tweet_emojies,emoji_free_tweets,url_free_tweets
0,This kid will forever be a legend 😂 https://t....,[😂],This kid will forever be a legend https://t.co...,This kid will forever be a legend
1,"If you truly believe Lebrons mindset, competit...",[],"If you truly believe Lebrons mindset, competit...","If you truly believe Lebrons mindset, competit..."
2,BUTTLICKER! OUR PRICES HAVE NEVER BEEN LOWER!!!,[],BUTTLICKER! OUR PRICES HAVE NEVER BEEN LOWER!!!,BUTTLICKER! OUR PRICES HAVE NEVER BEEN LOWER!!!
3,@Bhuvan_Bam ❤️❤️,"[❤️, ❤️]",@Bhuvan_Bam,@Bhuvan_Bam
4,I'm not crying you're crying.\nhttps://t.co/Bc...,[],I'm not crying you're crying. https://t.co/BcF...,I'm not crying you're crying.


##Topic Modeling 🤓