In [1]:
#!pip install plotly
#!jupyter labextension install jupyterlab-plotly
!pip install pyspark
!pip install nltk -U
import os
import shutil
import io
import re
import warnings
from datetime import datetime
import pytz
import string

# Third-party
import pandas as pd
import numpy as np
import geopandas as gpd
from shapely import wkt
from shapely.geometry import Point

# Spark and GCP
import pyspark
from pyspark.sql import functions as f
from pyspark.sql.types import *
from pyspark import SparkContext
from pyspark.sql import SQLContext, SparkSession, Row
from pyspark.conf import SparkConf
from google.cloud import storage

# NLTK
import nltk
from nltk.util import ngrams
nltk.download('punkt')

[0m

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [2]:
pd.set_option('display.max_colwidth', None)
spark.conf.set("spark.sql.repl.eagerEval.enabled",True)
path = 'gs://msca-bdp-students-bucket/shared_data/pranavr569/tweets_classified'

### Helpers

In [3]:
@f.pandas_udf('string')
def spatial_join_udf(lat: pd.Series, lon: pd.Series) -> pd.Series:
    point_var = [Point(xy) for xy in zip(lon, lat)]
    gdf_points = gpd.GeoDataFrame(pd.DataFrame({'lat': lat, 'lon': lon}), crs='epsg:4326', geometry=point_var)
    gdf_joined = gpd.sjoin(gdf_points, gdf_countries, how='left')
    return gdf_joined['country']

### Read Data

In [4]:
cord_df = spark.read.parquet(path).select(['user_name','user_description','tweet_text','tweet_coordinates','Category','user_location','tweet_id'])

23/03/10 17:45:52 WARN org.apache.spark.sql.catalyst.util.package: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.


In [5]:
cord_df.dtypes

[('user_name', 'string'),
 ('user_description', 'string'),
 ('tweet_text', 'string'),
 ('tweet_coordinates', 'array<double>'),
 ('Category', 'string'),
 ('user_location', 'string'),
 ('tweet_id', 'string')]

In [6]:
# Unique users
cord_df.select('tweet_id').distinct().count()

                                                                                

12875304

In [7]:
cord_df = cord_df.filter(f.col('tweet_coordinates').isNotNull())
cord_df

                                                                                

user_name,user_description,tweet_text,tweet_coordinates,Category,user_location,tweet_id
Ohio Education,follow this accou...,youre looking wor...,"[-80.6495194, 41....",Other,Ohio,1514693472841703427
TMJ-HOU Intern Jobs,follow this accou...,baylor college me...,"[-95.3698028, 29....",Other,"Houston, TX",1514693686537306112
TMJ-KAL Transport.,follow this accou...,first student hir...,"[-86.338905, 41.9...",Other,"Kalamazoo, MI",1585271389245743104
TMJ- NCT Educ. Jobs,follow this accou...,education job mig...,"[-79.7919754, 36....",Other,"Triad Area, NC",1515002369645244418
TMJ-INL Transport.,follow this accou...,recommend anyone ...,"[-87.6300207, 40....",Other,"Lafayette, IN",1519376630824591360
Kentucky Educ.,follow this accou...,education amp tra...,"[-83.1932284, 37....",Other,Kentucky,1567166212475584512
TMJ- MIN Educ. Jobs,follow this accou...,join national her...,"[-83.6129939, 42....",Other,"Ann Arbor, MI",1518317359735345152
Leander ISD HR,leanderisd job i...,hiring read lates...,"[-97.8204548, 30....",Other,"Leander, TX",1543381471381008384
TMJ- CIN Educ. Jobs,follow this accou...,job open national...,"[-84.1916069, 39....",Other,"Cincinnati, OH",1521560121603551233
TMJ-RIP Educ. Jobs,follow this accou...,stop staring cloc...,"[-71.4128343, 41....",Other,"Providence, RI",1591140120089853952


In [8]:
topic = "(online learning|online education|study from home|from home|virtual learning|remote learning|remote education|remotely|e-learning|elearning|online courses|distance learning|self paced|blended learning|moocs|video lectures|online classes|personalized learning)"

In [9]:
topic_cord = cord_df.filter(f.col("tweet_text").rlike(topic))

### Explode Array To Two Columns

In [44]:
pd_df = cord_df.toPandas() 
pd_df

                                                                                

Unnamed: 0,user_name,user_description,tweet_text,tweet_coordinates,Category,user_location,tweet_id
0,Ohio Education,follow this account for geo targeted education teaching job tweets in ohio non metro need help tweet us at careerarc,youre looking work youngstown oh check education job via link bio 4th grade teacher stambaugh charter academy 2022 23 school year national heritage academies teacher,"[-80.6495194, 41.0997803]",Other,Ohio,1514693472841703427
1,TMJ-HOU Intern Jobs,follow this account for geo targeted internships job tweets in houston tx need help tweet us at careerarc,baylor college medicine hiring houston tx read latest internship job opening via link bio student helper,"[-95.3698028, 29.7604267]",Other,"Houston, TX",1514693686537306112
2,TMJ-KAL Transport.,follow this account for geo targeted transportation job tweets in kalamazoo mi need help tweet us at careerarc,first student hiring berrien springs mi read latest drivers job opening via link bio school bus driver busdriver driver,"[-86.338905, 41.9464342]",Other,"Kalamazoo, MI",1585271389245743104
3,TMJ- NCT Educ. Jobs,follow this account for geo targeted education teaching job tweets in triad area nc need help tweet us at careerarc,education job might great fit click link bio see k 8 teachers gate city charter academy 2022 23 school year national heritage academies teacher greensboro nc,"[-79.7919754, 36.0726354]",Other,"Triad Area, NC",1515002369645244418
4,TMJ-INL Transport.,follow this account for geo targeted transportation job tweets in lafayette in need help tweet us at careerarc,recommend anyone drivers job danville il click link bio see part time school bus driver first student driver parttime,"[-87.6300207, 40.124481]",Other,"Lafayette, IN",1519376630824591360
...,...,...,...,...,...,...,...
11550,TMJ-HAR Transp. Jobs,follow this account for geo targeted transportation job tweets in hartford ct need help tweet us at careerarc,hiring somers ct click link bio apply job school bus driver first student busdriver driver,"[-72.4460798, 41.9851929]",Other,"Greater Hartford, CT",1614408633047224320
11551,Leander ISD HR,leanderisd job info job hunting tips and influential hr leader re tweets click the link in bio to apply now for more information,recommend anyone job 2022 23 high school science teacher lisdteachwithme leander tx,"[-97.8835763, 30.5983229]",Other,"Leander, TX",1529940893855207424
11552,TMJ- TOL Educ. Jobs,follow this account for geo targeted education teaching job tweets in toledo oh need help tweet us at careerarc,career tip landing jobs like 3rd grade teacher bennett 2022 23 school year toledo oh go informational interviews best way get meeting hiring manager ask informational meeting education teacher,"[-83.555212, 41.6639383]",Other,"Toledo, OH",1529539851397304326
11553,TMJ- CIN Educ. Jobs,follow this account for geo targeted education teaching job tweets in cincinnati oh need help tweet us at careerarc,interested job dayton oh could great fit click link bio apply substitute teacher pathway school discovery national heritage academies education teacher,"[-84.1916069, 39.7589478]",Other,"Cincinnati, OH",1529943708904304640


In [45]:
cols = ['long', 'lat']
pd_df[cols] = pd.DataFrame(pd_df.tweet_coordinates.tolist(), index=pd_df.index, columns=cols)
pd_df.drop('tweet_coordinates', axis=1, inplace=True)
pd_df

Unnamed: 0,user_name,user_description,tweet_text,Category,user_location,tweet_id,long,lat
0,Ohio Education,follow this account for geo targeted education teaching job tweets in ohio non metro need help tweet us at careerarc,youre looking work youngstown oh check education job via link bio 4th grade teacher stambaugh charter academy 2022 23 school year national heritage academies teacher,Other,Ohio,1514693472841703427,-80.649519,41.099780
1,TMJ-HOU Intern Jobs,follow this account for geo targeted internships job tweets in houston tx need help tweet us at careerarc,baylor college medicine hiring houston tx read latest internship job opening via link bio student helper,Other,"Houston, TX",1514693686537306112,-95.369803,29.760427
2,TMJ-KAL Transport.,follow this account for geo targeted transportation job tweets in kalamazoo mi need help tweet us at careerarc,first student hiring berrien springs mi read latest drivers job opening via link bio school bus driver busdriver driver,Other,"Kalamazoo, MI",1585271389245743104,-86.338905,41.946434
3,TMJ- NCT Educ. Jobs,follow this account for geo targeted education teaching job tweets in triad area nc need help tweet us at careerarc,education job might great fit click link bio see k 8 teachers gate city charter academy 2022 23 school year national heritage academies teacher greensboro nc,Other,"Triad Area, NC",1515002369645244418,-79.791975,36.072635
4,TMJ-INL Transport.,follow this account for geo targeted transportation job tweets in lafayette in need help tweet us at careerarc,recommend anyone drivers job danville il click link bio see part time school bus driver first student driver parttime,Other,"Lafayette, IN",1519376630824591360,-87.630021,40.124481
...,...,...,...,...,...,...,...,...
11550,TMJ-HAR Transp. Jobs,follow this account for geo targeted transportation job tweets in hartford ct need help tweet us at careerarc,hiring somers ct click link bio apply job school bus driver first student busdriver driver,Other,"Greater Hartford, CT",1614408633047224320,-72.446080,41.985193
11551,Leander ISD HR,leanderisd job info job hunting tips and influential hr leader re tweets click the link in bio to apply now for more information,recommend anyone job 2022 23 high school science teacher lisdteachwithme leander tx,Other,"Leander, TX",1529940893855207424,-97.883576,30.598323
11552,TMJ- TOL Educ. Jobs,follow this account for geo targeted education teaching job tweets in toledo oh need help tweet us at careerarc,career tip landing jobs like 3rd grade teacher bennett 2022 23 school year toledo oh go informational interviews best way get meeting hiring manager ask informational meeting education teacher,Other,"Toledo, OH",1529539851397304326,-83.555212,41.663938
11553,TMJ- CIN Educ. Jobs,follow this account for geo targeted education teaching job tweets in cincinnati oh need help tweet us at careerarc,interested job dayton oh could great fit click link bio apply substitute teacher pathway school discovery national heritage academies education teacher,Other,"Cincinnati, OH",1529943708904304640,-84.191607,39.758948


In [22]:
pd_df.is_retweet.sum()

0

In [46]:
map_df = spark.createDataFrame(pd_df)

In [47]:
map_df.columns

['user_name',
 'user_description',
 'tweet_text',
 'Category',
 'user_location',
 'tweet_id',
 'long',
 'lat']

In [48]:
map_df = map_df.groupby("lat", "long").\
    agg(f.count("tweet_id").alias("Count")).toPandas()

                                                                                

## Where are these Twitterers located?

### Plotting the Geogrphical Distribution using the tweet coordinates

In [49]:
import folium
from folium import plugins

In [50]:
map_data = map_df.values.tolist()

In [51]:
Heatmap = folium.Map(zoom_start = 4,
    tiles = "Stamen Terrain", control_scale = True, width = '100%', height = '70%')
plugins.HeatMap(map_data, min_opacity = 0.5, radius = 15, blur = 18).add_to(Heatmap)
html_title = '<h3 align="center" style="font-size:20px"><b>My Heatmap</b></h3>'
folium.Html(html_title, script=True).add_to(Heatmap)

# Create a custom HTML legend using the folium.features module
legend_html = '''
     <div style="position: fixed; bottom: 50px; left: 50px; width: 150px; height: 130px; 
     background-color: rgba(255, 255, 255, 0.8); z-index:9999; font-size:14px; font-weight:bold; 
     padding: 10px;">
     <p>Tweet Volume</p>
     <p><span style="background-color:blue">&nbsp;&nbsp;&nbsp;&nbsp;</span>Low </p>
     <p><span style="background-color:purple">&nbsp;&nbsp;&nbsp;&nbsp;</span>Medium</p>
     <p><span style="background-color:orange">&nbsp;&nbsp;&nbsp;&nbsp;</span>High</p>
     <p><span style="background-color:red">&nbsp;&nbsp;&nbsp;&nbsp;</span>Very High</p>
     </div>
     '''
Heatmap.get_root().html.add_child(folium.Element(legend_html))
display(Heatmap)

### Do you see any relationship between the emergence of new issues in education and progression and locations of these Twitterers?

In [11]:
loc_df = spark.read.parquet(path)

In [12]:
loc_df.columns

['user_description',
 'user_favourites_count',
 'user_followers_count',
 'user_friends_count',
 'user_id',
 'user_name',
 'user_protected',
 'user_screen_name',
 'user_statuses_count',
 'user_verified',
 'user_withheld_in_countries',
 'user_location',
 'tweet_date',
 'retweeted_status',
 'is_retweet',
 'is_reply',
 'tweet_id',
 'retweeted_from',
 'tweet_text',
 'quoted_status',
 'hashtags',
 'tweet_coordinates',
 'retweet_count',
 'retweeted_from_name',
 'retweeted_from_description',
 'Category']

In [13]:
loc_df = loc_df.select('user_name','user_description','tweet_text','Category','user_location','tweet_id')

In [50]:
loc_df.count()

                                                                                

12876408

In [14]:
topic = "(online learning|online education|study from home|from home|virtual learning|remote learning|remote education|remotely|e-learning|elearning|online courses|distance learning|self paced|blended learning|moocs|video lectures|online classes|personalized learning)"

In [15]:
topic_loc = loc_df.filter(f.col("tweet_text").rlike(topic))

In [16]:
topic_loc = topic_loc.filter(f.col('user_location')!= 'null')

In [17]:
topic_loc.columns

['user_name',
 'user_description',
 'tweet_text',
 'Category',
 'user_location',
 'tweet_id']

In [18]:
topic_loc = topic_loc.withColumn("location", f.substring_index(f.col("user_location"), ",", 1))

In [19]:
topic_loc.columns

['user_name',
 'user_description',
 'tweet_text',
 'Category',
 'user_location',
 'tweet_id',
 'location']

In [20]:
us_ctry = [ 'AK', 'AL', 'AR', 'AZ', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL', 'GA',
           'HI', 'IA', 'ID', 'IL', 'IN', 'KS', 'KY', 'LA', 'MA', 'MD', 'ME',
           'MI', 'MN', 'MO', 'MS', 'MT', 'NC', 'ND', 'NE', 'NH', 'NJ', 'NM',
           'NV', 'NY', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX',
           'UT', 'VA', 'VT', 'WA', 'WI', 'WV', 'WY','Texas','USA','US','United States',
                                          "Alaska", "Alabama",
             "Arkansas", "American Samoa", "Arizona", "California", "Colorado",
             "Connecticut", "District ", "of Columbia", "Delaware", "Florida", 
             "Georgia", "Guam", "Hawaii", "Iowa", "Idaho", "Illinois", "Indiana",
             "Kansas", "Kentucky", "Louisiana", "Massachusetts", "Maryland", "Maine", 
             "Michigan", "Minnesota", "Missouri", "Mississippi", "Montana", "North Carolina",
             "North Dakota", "Nebraska", "New Hampshire", "New Jersey", "New Mexico", "Nevada", 
             "New York", "Ohio", "Oklahoma", "Oregon", "Pennsylvania", "Puerto Rico", "Rhode Island", 
             "South Carolina", "South Dakota", "Tennessee", "Texas", "Utah", "Virginia", 
             "Virgin Islands", "Vermont", "Washington", "Wisconsin", "West Virginia", "Wyoming",
             "Aberdeen", "Abilene", "Akron", "Albany", "Albuquerque", "Alexandria", "Allentown",
             "Amarillo", "Anaheim", "Anchorage", "Ann Arbor", "Antioch", "Apple Valley", "Appleton",
             "Arlington", "Arvada", "Asheville", "Athens", "Atlanta", "Atlantic City", "Augusta", 
             "Aurora", "Austin", "Bakersfield", "Baltimore", "Barnstable", "Baton Rouge", "Beaumont",
             "Bel Air", "Bellevue", "Berkeley", "Bethlehem", "Billings", "Birmingham", "Bloomington", 
             "Boise", "Boise City", "Bonita Springs", "Boston", "Boulder", "Bradenton", "Bremerton",
             "Bridgeport", "Brighton", "Brownsville", "Bryan", "Buffalo", "Burbank", "Burlington",
             "Cambridge", "Canton", "Cape Coral", "Carrollton", "Cary", "Cathedral City", "Cedar Rapids", 
             "Champaign", "Chandler", "Charleston", "Charlotte", "Chattanooga", "Chesapeake", "Chicago",
             "Chula Vista", "Cincinnati", "Clarke County", "Clarksville", "Clearwater", "Cleveland", 
             "College Station", "Colorado Springs", "Columbia", "Columbus", "Concord", "Coral Springs", 
             "Corona", "Corpus Christi", "Costa Mesa", "Dallas", "Daly City", "Danbury", "Davenport", 
             "Davidson County", "Dayton", "Daytona Beach", "Deltona", "Denton", "Denver", "Des Moines",
             "Detroit", "Downey", "Duluth", "Durham", "El Monte", "El Paso", "Elizabeth", "Elk Grove",
             "Elkhart", "Erie", "Escondido", "Eugene", "Evansville", "Fairfield", "Fargo", "Fayetteville",
             "Fitchburg", "Flint", "Fontana", "Fort Collins", "Fort Lauderdale", "Fort Smith",
             "Fort Walton Beach", "Fort Wayne", "Fort Worth", "Frederick", "Fremont", "Fresno", 
             "Fullerton", "Gainesville", "Garden Grove", "Garland", "Gastonia", "Gilbert", "Glendale",
             "Grand Prairie", "Grand Rapids", "Grayslake", "Green Bay", "GreenBay", "Greensboro",
             "Greenville", "Gulfport-Biloxi", "Hagerstown", "Hampton", "Harlingen", "Harrisburg", 
             "Hartford", "Havre de Grace", "Hayward", "Hemet", "Henderson", "Hesperia", "Hialeah",
             "Hickory", "High Point", "Hollywood", "Honolulu", "Houma", "Houston", "Howell", "Huntington", 
             "Huntington Beach", "Huntsville", "Independence", "Indianapolis", "Inglewood", "Irvine", 
             "Irving", "Jackson", "Jacksonville", "Jefferson", "Jersey City", "Johnson City", "Joliet", 
             "Kailua", "Kalamazoo", "Kaneohe", "Kansas City", "Kennewick", "Kenosha", "Killeen", "Kissimmee", 
             "Knoxville", "Lacey", "Lafayette", "Lake Charles", "Lakeland", "Lakewood", "Lancaster", "Lansing",
             "Laredo", "Las Cruces", "Las Vegas", "Layton", "Leominster", "Lewisville", "Lexington", "Lincoln",
             "Little Rock", "Long Beach", "Lorain", "Los Angeles", "Louisville", "Lowell", "Lubbock", "Macon",
             "Madison", "Manchester", "Marina", "Marysville", "McAllen", "McHenry", "Medford", "Melbourne", 
             "Memphis", "Merced", "Mesa", "Mesquite", "Miami", "Milwaukee", "Minneapolis", "Miramar",
             "Mission Viejo", "Mobile", "Modesto", "Monroe", "Monterey", "Montgomery", "Moreno Valley", 
             "Murfreesboro", "Murrieta", "Muskegon", "Myrtle Beach", "Naperville", "Naples", "Nashua", 
             "Nashville", "New Bedford", "New Haven", "New London", "New Orleans", "New York", "New York City",
             "Newark", "Newburgh", "Newport News", "Norfolk", "Normal", "Norman", "North Charleston", 
             "North Las Vegas", "North Port", "Norwalk", "Norwich", "Oakland", "Ocala", "Oceanside",
             "Odessa", "Ogden", "Oklahoma City", "Olathe", "Olympia", "Omaha", "Ontario", "Orange", 
             "Orem", "Orlando", "Overland Park", "Oxnard", "Palm Bay", "Palm Springs", "Palmdale", 
             "Panama City", "Pasadena", "Paterson", "Pembroke Pines", "Pensacola", "Peoria", "Philadelphia", 
             "Phoenix", "Pittsburgh", "Plano", "Pomona", "Pompano Beach", "Port Arthur", "Port Orange", 
             "Port Saint Lucie", "Port St. Lucie", "Portland", "Portsmouth", "Poughkeepsie", "Providence", 
             "Provo", "Pueblo", "Punta Gorda", "Racine", "Raleigh", "Rancho Cucamonga", "Reading", "Redding",
             "Reno", "Richland", "Richmond", "Richmond County", "Riverside", "Roanoke", "Rochester", "Rockford",
             "Roseville", "Round Lake Beach", "Sacramento", "Saginaw", "Saint Louis", "Saint Paul", 
             "Saint Petersburg", "Salem", "Salinas", "Salt Lake City", "San Antonio", "San Bernardino", 
             "San Buenaventura", "San Diego", "San Francisco", "San Jose", "Santa Ana", "Santa Barbara",
             "Santa Clara", "Santa Clarita", "Santa Cruz", "Santa Maria", "Santa Rosa", "Sarasota", 
             "Savannah", "Scottsdale", "Scranton", "Seaside", "Seattle", "Sebastian", "Shreveport", 
             "Simi Valley", "Sioux City", "Sioux Falls", "South Bend", "South Lyon", "Spartanburg", 
             "Spokane", "Springdale", "Springfield", "St. Louis", "St. Paul", "St. Petersburg", "Stamford", 
             "Sterling Heights", "Stockton", "Sunnyvale", "Syracuse", "Tacoma", "Tallahassee", "Tampa",
             "Temecula", "Tempe", "Thornton", "Thousand Oaks", "Toledo", "Topeka", "Torrance", "Trenton",
             "Tucson", "Tulsa", "Tuscaloosa", "Tyler", "Utica", "Vallejo", "Vancouver", "Vero Beach", 
             "Victorville", "Virginia Beach", "Visalia", "Waco", "Warren", "Washington", "Waterbury", 
             "Waterloo", "West Covina", "West Valley City", "Westminster", "Wichita", "Wilmington", 
             "Winston", "Winter Haven", "Worcester", "Yakima", "Yonkers", "York", "Youngstown"]

us_ctry_reg='|'.join(["(" + c +")" for c in us_ctry])

In [21]:
topic_loc = topic_loc\
.withColumn('Location',f.when(topic_loc['location'].rlike(us_ctry_reg), 'United States')\
            .otherwise(topic_loc['location']))

In [22]:
topic_loc.groupby("Location").agg(f.count('tweet_id').alias('count'), f.max('user_name').alias('user_name'),
                                                  f.max('user_description').alias('user_description'),
                                                  f.max('tweet_text').alias('tweet_text'),
                                                  f.max('Category').alias('Category'))\
                            .select('user_name','user_description','tweet_text','Location','Category','count').orderBy('count', ascending=False).filter(f.col('location')!='None').toPandas().head(10)

                                                                                

Unnamed: 0,user_name,user_description,tweet_text,Location,Category,count
0,ü©∏ Sangre Grande üáπüáπ,zeta no me diga nada l gbtq el amor es ciego pero los vecinos no,zzzahranxn hi help essays research papers term papers lab reports online classes exams school assignments hmu email topgrade30 gmail com,United States,Other,12276
1,ü¶Ñ,youtuber blogger cryptogirl chef a soon to be cert product designer youtube,zoom class gone wrong mr lecturer said david failed course prosperity wahala wan bone school yahoo oo tag someone funpost abroad funnyvideos zoomclass onlinelearning nigeria africa reels monday lockdown trending,Lagos,Other,687
2,ü•ëü••,youth advocate writer lifes for the living,wordslinger technology transforming globalizing education ways never seen consolidation happen global universities online education reached maturity combined vr ar universities going continue seeing great disruption,Nairobi,Other,573
3,üíïGbemyüíï,zoologist entomologist barcelona lasuite dodo freak culer,yabaleftonline education better work ooo person cant go school stress work nkan sele,Nigeria,Other,460
4,üç∏ShakenMartiniüáÆüá≥,your journey to the technology world starts here your technology education destination,without proper regulatory clarity oversight emerging policy online education threatens turn university shell institution commandeered industry profiteering chaitanyaravi3 juhisidharth perils digital education,India,Other,394
5,üìöBibliophile Vickiüìú,you know who i am a muslim pakistani,year 1 children loved collecting beautiful natural objects last week represent forest school journey forestschool outdoorlearning lovelearning highgate haringey,London,Other,359
6,üëªollie d(efund the police)üëª,youre just a step on the bossmans ladder,ycsb director education visited us fourth day curriculum driven end school activities today 86 130 grade eight learners still participating student teacher led activities super proud resilience tenacity online learning,Toronto,Other,252
7,üá®üá¶üçä‚úåüåà N of 49th Proud radical lefty‚ù§üí™,you know youre getting old when popular culture passes you by and you dont care,wow youth almost first school trip since 2018 get education workers one union nightmare stand kids deserve spread wings one bloody year online learning onted,Canada,Other,213
8,üáøüá¶@Trevor_Müáøüá¶,zihlandlo afr ca projects za projects zihlandlo agribusiness zas,yes matric certificate helps gain access bursary opportunities matriculantmonday know eskilz college offers matric equivalent online courses order get certificate via virtual learning enrol,Johannesburg,Other,210
9,üáÆüá≥ ùóóùóÆùóÆùóøùòÇùóïùóÆùóÆùòá ùó†ùó≤ùóµùòÅùóÆ üáÆüá≥,youtuber cinephile sports enthusiast bookworm potterhead movie buff travel enthusiast,zhou li chn mfa china jirongmfa xijinping chin cgmeifangzhang zlj517 ambliuxiaoming university teachers giving excuses responding giving noc students getting online education last 3 years takeusbacktochina xinjiang shiheziuniversity givenocs,New Delhi,Other,203


In [23]:
topic_loc.filter((f.col("Location")=='United States')).toPandas().head(10)

                                                                                

Unnamed: 0,user_name,user_description,tweet_text,Category,user_location,tweet_id,Location
0,#DistanceLearning Bot,follow this account to get tweets with hashtag distancelearning in your timeline share learn support students together managed by mrsjenncarr,video know kuvempu university distance education reviews visit link kuvempuuniversity kuvempuuniversitydistanceeducation distanceeducation onlineeducation distancelearning mba kuvempuuniversitydistancembaprograms,Other,San Diego,1552170306088931328,United States
1,UW Extended Campus,we offer continuing education and online degrees in partnership with all 13 university of wis campuses visit to explore our programs,join live webinar 11 2 12 00 pm cst learn university wisconsin flexible option 100 online associate arts amp sciences 30 minutes register aas onlinelearning,Other,"Madison, WI",1585271061481930752,United States
2,InHomeTutoringHonolulu.com,affordable private in home tutoring honolulu hawaii covering all of oahu and remotely for the entire u s a text or call 808 224 1870,04 17 2022 community college guide transfer ivy league amp tier 1 education top 10 education courtesy rick kirkham affordable private home tutoring honolulu hawaii covering oahu remotely entire u text call,Other,"Honolulu, HI",1516181290445275139,United States
3,TOP ACCURATE AND PERFECT WRITERS,we deal with all kinds of assignment projects essays homework at affordable price,assignment due dm help pay essay online class powerpoint someone help assignment due online classes pay discussion post college courses,Other,"California, USA",1559247138315550720,United States
4,TOP ACCURATE AND PERFECT WRITERS,we deal with all kinds of assignment projects essays homework at affordable price,assignment due dm help pay essay powerpoint homework write essay someone help assignment due online classes pay discussion post college courses,Other,"California, USA",1559247170653638656,United States
5,Dr. Bree Boppre üíú,assistant professor of victim studies at shsu highered pedagogy incarceration gender families trauma phdpets she her bo prey views my own,one students final reflection stated often feels forgotten university online student building community online courses key especially support underserved students resources online pedagogy helped,Other,"Houston, TX",1519376066363768834,United States
6,UAGC - The University of Arizona Global Campus,the university of arizona global campus uagc is an online university offering associate bachelor s master s and doctoral degree programs,first day school jitters check new student resource guide find everything networking opportunities access library onlinelearning onlinedegree backtoschool elearning uagc,Other,"Chandler, Arizona",1567165736384094209,United States
7,Alessandro Rigolon,assistant professor of planning u of utah research on urban green space environmental justice gentrification and health opinions my own he him,excited helped capture messy transitions college students move online education 2020 covid 19 changing college system paper snapshot mutelbrowning alerigolon researchgate,Other,"Salt Lake City, UT",1518318502913863681,United States
8,"Lauren Mullenbach, Ph.D.",assistant professor oudges urban environmental justice climate justice green gentrification she her,excited helped capture messy transitions college students move online education 2020 covid 19 changing college system paper snapshot mutelbrowning alerigolon researchgate,Other,"Norman, OK",1518318567170707465,United States
9,Spy Report üî¶üèÄ,a circle of avid basketball fanatics spotlighting northern ohio s most talented high school stars,final numbers today event 2 4 c 2023 athletes competed 2 4 c 2024 athletes competed 1 6 college programs attended live 2 1 college programs tuned remotely huge thanks ohrocketsbball polarbearhoops lou garcia22 maumeebayturf assistance,Other,Ohio,1553916439811194880,United States


In [55]:
topic_loc.filter((f.col("Location")=='United States'))\
.groupby(f.col('Category'))\
.agg(f.count('tweet_id').alias('count'),
    f.max('user_name').alias('user_name'),
    f.max('user_description').alias('user_description'),
    f.max('tweet_text').alias('tweet_text'),
    f.max('Location').alias('Location'))\
.select(['user_name','user_description','tweet_text','Category','Location','count']).toPandas()

                                                                                

Unnamed: 0,user_name,user_description,tweet_text,Category,Location,count
0,University of Arizona,whether we are touching an asteroid or determining how we feed 9 billion people arizona wildcats ask bigger questions to get better answers beardown,war ukraine marred start new school year millions children unicef says fewer 60 schools country deemed safe amp eligible reopen forcing start school year remote learning,Government,United States,30
1,United Nations,official account of the united nations for peace dignity equality on a healthy planet,war ukraine marred start new school year millions children unicef says fewer 60 schools country deemed safe amp eligible reopen forcing start school year remote learning,Influencer,United States,6
2,Tom,tomadwar808 gmail com ex pro founder adwar foundation,stanford university offering free online courses application fee required 5 free courses dont want miss,Nonprofit_organization,United States,1
3,ü©∏ Sangre Grande üáπüáπ,zeta no me diga nada l gbtq el amor es ciego pero los vecinos no,zzzahranxn hi help essays research papers term papers lab reports online classes exams school assignments hmu email topgrade30 gmail com,Other,United States,12578
4,Stephanie Kelton,writer of some books curator ohioan,upset see outlined school board website taxpayer money may soon going pay scab substitute teacher workforce come provide virtual learning columbus community kids deserve better need fair contract,Celebrity,United States,9
5,Zinnia Jones,where education meets exploration explorermindset,wondering student agency join ohio personalized learning network learn student agency much much excited see many schools ready move learning forward personalizeoh learnerdriven knowledgeworks oheducation escnortheastoh,Educational_institute,United States,83
6,toronto.com,your station for news sports and weather across central pennsylvania chime in your images and videos at,wife special needs teacher bitten multiple times already school year shes never come remotely close putting hands student aggressively man arrested permenantly removed interaction children,News,United States,145


In [24]:
topic_loc.filter(f.col("Location")=='United States')\
.filter(f.col("Category")=='Other')\
.groupby(f.col('user_name'))\
.agg(f.count('tweet_id').alias('count'),
    f.max('user_description').alias('user_description'),
    f.max('tweet_text').alias('tweet_text'),
    f.max('user_location').alias('user_location'))\
.select(['user_name','user_description','tweet_text','user_location','count']).orderBy('count',ascending=False).toPandas()

                                                                                

Unnamed: 0,user_name,user_description,tweet_text,user_location,count
0,Brianna Essays,we offer 24 7 online writing services guaranteed with good grades and zero plagiarism get in touch with us for amazing grades email us at topgrade30 gmail com,zymuvaa hi help essays research papers term papers lab reports online classes exams school assignments hmu email topgrade30 gmail com,"Massachusetts, USA",544
1,Top Grade Writers,we offer 24 7 online writing services guaranteed with good grades and zero plagiarism get in touch with us for amazing grades email us at topgrade30 gmail com,zoexmaduike hi help essays research papers term papers lab reports online classes exams school assignments hmu email topgrade30 gmail com,United States,379
2,mariabel,professional tutor timely work privacy and good grade guaranteed god first email mariawriter732 gmail com 802 210 6607,zoevanv hey help handle essays research papers term papers lab reports online classes exams school assignments hmu mariawriter7 gmail com,"Dallas, TX",370
3,InHomeTutoringHonolulu.com,affordable private in home tutoring honolulu hawaii covering all of oahu and remotely for the entire u s a text or call 808 224 1870,06 03 2022 community college guide transfer ivy league amp tier 1 education top 10 education courtesy rick kirkham affordable private home tutoring honolulu hawaii covering oahu remotely entire u text call,"Honolulu, HI",244
4,Sharon Services,i offer legit and professional assignment help at affordable prices kindly send a dm for assistance,zzzahranxn hi help essays research papers term papers lab reports online classes exams school assignments hmu email topgrade30 gmail com,United States,143
...,...,...,...,...,...
6882,üî®‚è∞,former l e first account was twittered rootin4putin kag 2a btb ifbap pref pronouns nope must be my swagger,16 yr old pulling public school online learning handle school focused pronouns amp sexual orientation academics wants engineer amp asks sex calculus 11 support,"Ohio, USA",1
6883,üñ§«ùu·¥â…Øs…ê…æüñ§ 1312,she her blm i make stuff ig stuffnsuchnthings,harvard university offering free online courses application fee required 10 free courses dont want miss,"Ohio, USA",1
6884,üñ§üíéLeeüá∞üá™üá®üá¶,outgoing freelancer car lover big machine traveller manchester united diehard fan,important websites keep tabs thread 1 online side hustles see people houses 2 access free online courses world top universities colleges like harvard london kings college et,"Ontario, Canada",1
6885,ü§êü§êHoverdozey üòéüò´üòèüôÑ,researcher data analyst academic instructor gunners 4 life,university adelaide free online courses,United States,1


In [25]:
topic_loc.filter(f.col("Location")=='United States')\
.filter(f.col("Category")=='News')\
.groupby(f.col('user_name'))\
.agg(f.count('tweet_id').alias('count'),
    f.max('user_description').alias('user_description'),
    f.max('tweet_text').alias('tweet_text'),
    f.max('user_location').alias('user_location'))\
.select(['user_name','user_description','tweet_text','user_location','count']).orderBy('count',ascending=False).toPandas().head(5)

                                                                                

Unnamed: 0,user_name,user_description,tweet_text,user_location,count
0,5NEWS,covering news where you live in northwest arkansas and the river valley metro,future school fort smith free public charter school enrolling 9th 12th graders spring offer personalized learning plans college courses schedule tour see offer sponsored future school fort smith,"Fort Smith - Fayetteville, AR",3
1,amna,co anchor pbs newshour contributor nbcnews msnbc paul s wife karam lili s mom penn lsenews such a,president auaf shares university closed kabul august 15 2021 august 29th launched online classes nearly every single student logged keep learning today enrolled 300 students semester 200 women auafgala,"Washington, DC",3
2,St. Louis Post-Dispatch,st louis post dispatch news sports business features blogs photos and video weatherbird is the twitter ambassador for the p d,st louis area teacher shortage means online classes school blythebernhard photos d76,"St. Louis, Mo.",2
3,Tulsa World,please consider supporting local journalism by subscribing today,teacher shortages problem covid 19 pandemic worsened problem teachers left profession worn low wages remote learning political fights teach schools,"Tulsa, Okla.",2
4,Simon Hall,course leader communication skills university of cambridge director creative warehouse author journalist ex bbc lover of words thoughts feelings,team always team blessed work great group learning designers copy editors graphic artists video producers creating new university cambridge communication course great night online education celebration,"Cambridge, England",2


In [26]:
topic_loc.filter(f.col("Location")=='United States')\
.filter(f.col("Category")=='Educational_institute')\
.groupby(f.col('user_name'))\
.agg(f.count('tweet_id').alias('count'),
    f.max('user_description').alias('user_description'),
    f.max('tweet_text').alias('tweet_text'),
    f.max('user_location').alias('user_location'))\
.select(['user_name','user_description','tweet_text','user_location','count']).orderBy('count',ascending=False).toPandas().head(5)

                                                                                

Unnamed: 0,user_name,user_description,tweet_text,user_location,count
0,Lightspeed Systems,lightspeed systems is trusted in 28 000 schools across 38 countries to advance educational effectiveness and safety for every student,proud partner washingtons colville school district advance onlinelearning schoolsafety initiatives thank partnership learn lightspeed solutions,"Based in Austin, TX",20
1,Remake Learning,reporting the happenings of the remakelearning network 500 orgs in southwest pa and wv working to accelerate engaging relevant and equitable learning,welcome northgateproud new member remakelearning council currently serves superintendent northgate school district ample experience education administration read remake learning profile,"Pittsburgh, PA",7
2,EdSurge,edsurge covers and connects the people ideas and tools shaping the future of learning,school districts deploying remote learning alternative school closures extreme weather severe illness remotelearning virtuallearning,"Portland, OR",4
3,ShareMyLesson,aftunions share my lesson is where prek 16 educators school staff parents collaborate take for credit webinars share their best materials edupd smlpd,register popular demand free webinar learn strategies select teach vocabulary support word recognition text comprehension edchat k12 learning distancelearning pdedu aftunion aftteach,"Washington, DC",4
4,Elizabethtown College,telling the elizabethtown college story our vibrant campus is located in southcentral pas historic lancaster county etowncollege bluejaysalways,total 37 etowncollege students worked closely faculty staff mentors campus remotely summer perform independent student research etownscarp program learn interactive recap student research,"Elizabethtown, PA",2


In [27]:
topic_loc.filter(f.col("Location")=='United States')\
.filter(f.col("Category")=='Government')\
.groupby(f.col('user_name'))\
.agg(f.count('tweet_id').alias('count'),
    f.max('user_description').alias('user_description'),
    f.max('tweet_text').alias('tweet_text'),
    f.max('user_location').alias('user_location'))\
.select(['user_name','user_description','tweet_text','user_location','count']).orderBy('count',ascending=False).toPandas().head(5)

                                                                                

Unnamed: 0,user_name,user_description,tweet_text,user_location,count
0,Angela Williams,government affairs policy former colorado senator representative equitable education leader virtual learning visionary stridelearn,virtual learning environments created w understanding education amp built around student whole person remote learning largely forces students fall line w whatever schools amp administrations offer,Colorado,6
1,Arizona State University,official twitter of arizona state university no 1 in innovation top 10 of all universities worldwide for u s patents awarded,pwn college online educational platform provides training modules aspiring cybersecurity professionals within outside asu 3 5,"Tempe, Arizona",1
2,"Dr. Goddess, üó£ in #TulsaBurning @History Channel",digital strategist blacktwitter scholar blackwallstreet augustwilson artist afroroma traveler atripofftheoldblock filmmaker ybtm dir panafest,harvard university offering free online courses application fee required 10 free courses dont want miss,"Pittsburgh, PA",1
3,University of Arizona,whether we are touching an asteroid or determining how we feed 9 billion people arizona wildcats ask bigger questions to get better answers beardown,play age empires iv earning college credit learning never fun microsoft university arizona online partnered transform age empires iv truly unique online learning experience learn arizonaonline,"Tucson, Arizona",1
4,Conor P. Williams,senior fellow tcfdotorg phd dad flyinglionbeer brewing magnate some good bylines earnest fallible clumsy opinions here solely wholly mine,talk learning loss school closures virtual learning two elementary schoolers missed 6 7 weeks school year covid hitting house another couple weeks outbreaks school,"District of Columbia, USA",1


In [28]:
topic_loc.filter(f.col("Location")=='United States')\
.filter(f.col("Category")=='Celebrity')\
.groupby(f.col('user_name'))\
.agg(f.count('tweet_id').alias('count'),
    f.max('user_description').alias('user_description'),
    f.max('tweet_text').alias('tweet_text'),
    f.max('user_location').alias('user_location'))\
.select(['user_name','user_description','tweet_text','user_location','count']).orderBy('count',ascending=False).toPandas().head(5)

                                                                                

Unnamed: 0,user_name,user_description,tweet_text,user_location,count
0,"Salome Thomas-EL, Ed.D",award winning principal and author co author of i choose to stay and retention for a change international speaker and dad who appeared on gma and oprah radio,big week three doctors appearing good morning wilmington detvch monday morning discuss teacher retention building thriving school cultures hammond near chicago continue convo 100 educators southshorelearning2022,"Philly and Wilmington, DE",1
1,Hanif Abdurraqib,writer of some books curator ohioan,upset see outlined school board website taxpayer money may soon going pay scab substitute teacher workforce come provide virtual learning columbus community kids deserve better need fair contract,"Columbus, OH",1
2,R.NFT R ‚ÄúRay‚Äù Wang ÁéãÁëûÂÖâ 1A #Metaverse #RuleTheWorld,constellationr ceo disruptvshow 2x bestselling author hcleadership harvardbiz keynoter futurist provocateur edm dj foodie svlife taiwaneseam,ai ml artificialintelligence machinelearning datascience bigdata analytics blockchain tech data kuriharan mvollmer1 rwang0 dunkenkbliths nigewillson university michigan institute social research developing new data platform,Silicon Valley | SF | FL 410+,1
3,Matthew Yglesias,pro choice pro fish writer of slow boring senior fellow at niskanen center bloomberg columnist these tweets are worth what you pay for them,kids better school go school consistent finding keep relearning detriment,"Washington, DC",1
4,Dart_Adams,bostonian past npr mass appeal complex okayplayer now boston magazine author of the book of dart instead we became evil a tale of survival perseverance,harvard university offering free online courses application fee required 10 free courses dont want miss,"Boston, MA",1


In [29]:
topic_loc.filter(f.col("Location")=='United States')\
.filter(f.col("Category")=='Influencer')\
.groupby(f.col('user_name'))\
.agg(f.count('tweet_id').alias('count'),
    f.max('user_description').alias('user_description'),
    f.max('tweet_text').alias('tweet_text'),
    f.max('user_location').alias('user_location'))\
.select(['user_name','user_description','tweet_text','user_location','count']).orderBy('count',ascending=False).toPandas().head(5)

                                                                                

Unnamed: 0,user_name,user_description,tweet_text,user_location,count
0,Social Architect,author of financially irresponsible cfei rfc businessinsider parentsmagazine thegrio entrepreneur blackenterprise money bet,harvard university offering free online courses application fee required 10 free courses dont want miss,CT,1
1,Lesley Carhart üè≥Ô∏è‚Äçüåà,ics dfir dragosinc martial artist marksman humanist lvl14 neutral good rogue usaf ret tweet very serious things about infosec thoughts mine they them,theres much knowledge shared industry formalized college training self taught online courses certifications path want come also fix pipe bring new folks industry eom 10 10,"Chicago, IL",1
2,United Nations,official account of the united nations for peace dignity equality on a healthy planet,war ukraine marred start new school year millions children unicef says fewer 60 schools country deemed safe amp eligible reopen forcing start school year remote learning,"New York, NY",1
3,Pinder,artist and artist helper director of web3 strategy partnerships at venicemusic mgmt kaanlifemusic itsjvon alumni aftermath ent pandoramusic,harvard university offering free online courses application fee required 10 free courses dont want miss,LA x SEA,1
4,Cari Luna,author of the revolution of every day tin house books winner of the ken kesey award for fiction,much distorts discourse around school amp remote learning really get discussed perverse percentage american parents refuse make slightest effort protect children covid,"Portland, Oregon",1


In [30]:
topic_loc.filter(f.col("Location")=='United States')\
.filter(f.col("Category")=='Nonprofit_organization')\
.groupby(f.col('user_name'))\
.agg(f.count('tweet_id').alias('count'),
    f.max('user_description').alias('user_description'),
    f.max('tweet_text').alias('tweet_text'),
    f.max('user_location').alias('user_location'))\
.select(['user_name','user_description','tweet_text','user_location','count']).orderBy('count',ascending=False).toPandas().head(5)

                                                                                

Unnamed: 0,user_name,user_description,tweet_text,user_location,count
0,Tom,tomadwar808 gmail com ex pro founder adwar foundation,stanford university offering free online courses application fee required 5 free courses dont want miss,"New Orleans, LA",1
