In [1]:
using Plots, Distributions, JLD, CSV, DataFrames, PyCall, MultivariateStats, ProgressMeter, DataStructures

In [2]:
include("maputils.jl")

MapUtils

# Mapping Communities
The idea here is to define a plotting method that will extract relevant data at each zoom level to represent the communities.
First idea:
- show only a fraction of dots for sufficiently distant zoom?
- show the biggest users at every level by dot size
- show the most used words in every region

In [3]:
# the 1.2M x 60 classifier matrix:
U_60 = JLD.load("/media/henripal/hd1/data/U_60.jld", "U_60")

# the 1.2M names array
names = JLD.load("/media/henripal/hd1/data/names.jld","names")


1205559-element Array{String,1}:
 "Deborah87958167"    
 "texasfarmgirl1836"  
 "Squatch"            
 "Lu Who"             
 "SongsOfLaredo"      
 "Diva"               
 "Bishop Talbert Swan"
 "NadelParis"         
 "Buster Brown"       
 "AdolescentIdle"     
 "ㅤㅤㅤ"                
 "DCSlove1129"        
 "キャロット🥕"             
 ⋮                    
 "Sasha Illegems"     
 "sjandreae"          
 "leeanndroid"        
 "chabudai0001"       
 "SisSissaki"         
 "QKout"              
 "roa_isa"            
 "HolyFuzazzle"       
 "SofiaGuapura"       
 "JohnJulia18"        
 "KPniele"            
 "Name Redacted"      

In [4]:
# the 1.2M name to followers array
followers = CSV.read("/media/henripal/hd1/data/name_to_follower.csv", header =["name", "followers"])


Unnamed: 0,name,followers
1,"Nullable{WeakRefString{UInt8}}(""GavaironJ"")",Nullable{Int64}(5)
2,"Nullable{WeakRefString{UInt8}}(""bocchijoto"")",Nullable{Int64}(1834)
3,"Nullable{WeakRefString{UInt8}}(""cannabinolsen"")",Nullable{Int64}(1)
4,"Nullable{WeakRefString{UInt8}}(""angelman61"")",Nullable{Int64}(32)
5,"Nullable{WeakRefString{UInt8}}(""alex_latrice21"")",Nullable{Int64}(199)
6,"Nullable{WeakRefString{UInt8}}(""turnipkween"")",Nullable{Int64}(242)
7,"Nullable{WeakRefString{UInt8}}(""EveMorante"")",Nullable{Int64}(747)
8,"Nullable{WeakRefString{UInt8}}(""mwutley"")",Nullable{Int64}(113)
9,"Nullable{WeakRefString{UInt8}}(""LetsCllnk"")",Nullable{Int64}(59)
10,"Nullable{WeakRefString{UInt8}}(""positivelytaco"")",Nullable{Int64}(173)


In [5]:
tsne_xy = CSV.read("/media/henripal/hd1/data/tsne_results.csv");

In [6]:
tsne_data = Array{Tuple{Float64, Float64}, 1}(length(tsne_xy[:,1])) 
for (i,(k,v)) in enumerate(collect((zip(tsne_xy[:,1], tsne_xy[:,2]))))
    tsne_data[i] = (get(k),get(v))
end
typeof(tsne_data)

Array{Tuple{Float64,Float64},1}

In [7]:
name_to_followers = DefaultDict{String, Int64}(1)
for (k,v) in zip(followers[1], followers[2])
    name_to_followers[get(k)] = get(v)
end


In [8]:
follower_list = [name_to_followers[name] for name in names];

In [9]:
# normalizing the output
U_60_t = U_60'
@showprogress for j in 1:size(U_60_t, 2)
    U_60_t[:, j] = U_60_t[:, j]/ sum(U_60_t[:, j])
end

Progress: 100%|█████████████████████████████████████████| Time: 0:00:05


In [10]:
community_index = Array{Int64, 1}(size(U_60_t,2))
@showprogress for j in 1:size(U_60_t,2)
    community_index[j] = indmax(U_60_t[:,j])
end
    
    

Progress: 100%|█████████████████████████████████████████| Time: 0:00:02


In [11]:
marker_sizes = max(1,log.(follower_list));

In [12]:
gr(format = "png", size = (256,256))

Plots.GRBackend()

In [13]:
labels = Tuple{Float64, Float64, String}[]
for (xy, z) in zip(tsne_data, names)
    push!(labels, (xy[1], xy[2], z))
end

In [14]:
twitter_map = MapUtils.ScatterMap(tsne_data, :curl, community_index, marker_sizes, labels)

MapUtils.ScatterMap((-8.603973508234581,8.755876194051911),(-8.559032283159715,8.724184173062454),Tuple{Float64,Float64}[(-3.67537,0.406384),(-3.38894,-1.50023),(3.35444,2.2671),(-0.414676,-5.59813),(-3.68099,0.236792),(-0.216058,-3.08127),(-0.87525,-4.85737),(-2.05057,-1.20945),(-3.12345,-2.00421),(-6.65075,-3.59875)  …  (2.9136,-2.5362),(4.67752,-0.180636),(-7.93591,-2.32979),(-5.63709,-1.09109),(1.52822,-5.29661),(-5.55409,4.82156),(-6.09647,-3.0425),(-3.71735,-3.90759),(-1.87906,2.91195),(1.1919,-0.384781)],ColorTypes.RGBA{Float64}[RGBA{Float64}(0.390585,0.664069,0.533559,1.0),RGBA{Float64}(0.834731,0.859054,0.790252,1.0),RGBA{Float64}(0.970318,0.92799,0.907715,1.0),RGBA{Float64}(0.109809,0.307394,0.381312,1.0),RGBA{Float64}(0.390585,0.664069,0.533559,1.0),RGBA{Float64}(0.390585,0.664069,0.533559,1.0),RGBA{Float64}(0.247427,0.0602665,0.24501,1.0),RGBA{Float64}(0.571404,0.732618,0.601531,1.0),RGBA{Float64}(0.322047,0.639513,0.520266,1.0),RGBA{Float64}(0.970318,0.92799,0.907715,1.0) 

In [15]:
twitter_tilemap = MapUtils.TiledMap(7, twitter_map, marker_sizes)

MapUtils.TiledMap(Dict((5,7,4)=>MapUtils.ScatterMap((-2.09403,-1.00904),(4.40338,5.48358),Tuple{Float64,Float64}[(-1.63282,5.56484),(-1.71811,4.95468),(-1.76711,5.41047),(-1.19588,5.51248),(-0.905979,4.82802),(-2.13196,5.48667),(-1.60555,4.36292),(-1.28617,5.48963),(-2.14903,5.42977),(-1.72918,5.49719)  …  (-2.0042,5.07561),(-1.75842,4.92426),(-2.09476,5.45284),(-2.07752,5.41627),(-1.74085,5.41102),(-1.5599,5.57771),(-1.83039,5.40057),(-1.46039,5.14635),(-1.76959,5.31054),(-0.933895,5.29825)],ColorTypes.RGBA{Float64}[RGBA{Float64}(0.84997,0.524823,0.450866,1.0),RGBA{Float64}(0.84997,0.524823,0.450866,1.0),RGBA{Float64}(0.0822556,0.114924,0.26479,1.0),RGBA{Float64}(0.51284,0.709199,0.573664,1.0),RGBA{Float64}(0.784165,0.831258,0.745527,1.0),RGBA{Float64}(0.84997,0.524823,0.450866,1.0),RGBA{Float64}(0.784165,0.831258,0.745527,1.0),RGBA{Float64}(0.51284,0.709199,0.573664,1.0),RGBA{Float64}(0.84997,0.524823,0.450866,1.0),RGBA{Float64}(0.0822556,0.114924,0.26479,1.0)  …  RGBA{Float64}(0.849

In [None]:
MapUtils.plot_tm(twitter_tilemap, "./", markeralpha = .1, markerstrokewidth = 0)