In [None]:
import pandas as pd
import numpy as np
from scipy import stats
import tweepy

import matplotlib.pyplot as plt
import matplotlib.cm as cm

import networkx as nx
from networkx.algorithms.community import greedy_modularity_communities

In [None]:
BEARER_TOKEN = "AAAAAAAAAAAAAAAAAAAAAEkYWAEAAAAAiCZ95QEqxNKuluivi0dNKwu%2BUIA%3DpXPhzD5xrJFlCx6roDUnzjJ6jtuh8wr2AyPhfZls4g4Yo4kH8y"
client = tweepy.Client(bearer_token=BEARER_TOKEN)

In [None]:
# query = '#racunshopee OR "racun shopee" OR "shopee haul" OR #shopeehaul lang:id'
query = '(("rekomendasi" OR "referensi" OR "inspirasi") ("outfit" OR "baju" OR "OOTD")) OR ("outfit of the day" OR "OOTD") lang:id'
# query = 'eiger lang:id'

In [None]:
tweets_data = []
tweets_user = []

for response in tweepy.Paginator(client.search_recent_tweets,
                                query=query,
                                # start_time=start_time,
                                # end_time=end_time,
                                tweet_fields = ["created_at", "text", "author_id", "entities", "in_reply_to_user_id"],
                                user_fields = ["name", "username", "location", "verified", "description"],
                                max_results = 100,
                                expansions='author_id', limit=100):
  
    tweets_data += response.data
    tweets_user += response.includes["users"]

In [None]:
tweets_data_df = pd.DataFrame(tweets_data)
tweets_user_df = pd.DataFrame(tweets_user)

In [None]:
tweets_data_df.info()

In [None]:
tweets_df = tweets_user_df.rename(columns={"id": "author_id"})
tweets_df = tweets_df.drop_duplicates()
df = tweets_data_df.merge(tweets_df, left_on='author_id', right_on='author_id')
df.to_csv("tweets.csv")

In [None]:
mentions = []
for i in tweets_data:
  if(i["entities"] is not None):
    if "mentions" in i["entities"]:
      for j in i["entities"]["mentions"]:
        # print(j["id"], j["username"])
        mention = {
          "id": i["id"],
          "mention_id": j["id"],
          "mention_username": j["username"]
        }
        mentions.append(mention)
      # print(i["id"])

mentions_df = pd.DataFrame(mentions)

In [None]:
mentions_df.info()

In [None]:
tweets_mention_df = mentions_df.merge(df, left_on='id', right_on='id')

In [None]:
user_mentions_df = tweets_mention_df.rename(columns={"username": "source", "mention_username": "target"})
user_mentions_df = user_mentions_df[["source", "target"]]
user_mentions_df = user_mentions_df.drop_duplicates(keep= 'first', ignore_index=True)

In [None]:
in_reply_to_user_df = df[df['in_reply_to_user_id'].notna()]
# in_reply_to_user_df = in_reply_to_user_df.rename(columns={"username": "source"})
in_reply_to_user_df = in_reply_to_user_df.merge(df, left_on='in_reply_to_user_id', right_on='author_id')
in_reply_to_user_df = in_reply_to_user_df.rename(columns={"username_x": "source","username_y": "target"})
in_reply_to_user_df = in_reply_to_user_df[["source", "target"]]

In [None]:
final_df = pd.concat([in_reply_to_user_df, user_mentions_df], ignore_index=True)

In [None]:
G = nx.Graph()

In [None]:
node_names = [n for n in df["username"]] # Get a list of only the node names
records  = final_df.to_records(index=False)
edges = list(records)

In [None]:
G.add_nodes_from(node_names)
G.add_edges_from(edges)

In [None]:
G.nodes()

In [None]:
print(nx.info(G))

In [None]:
density = nx.density(G)
print("Network density:", density)
plt.figure(figsize=(10,10))
nx.draw_networkx(G, with_labels=True,
        node_color='skyblue',
        # node_size=1200,
        arrowstyle='->',
        arrowsize=20,
        edge_color='red',
        font_size=9,
        pos=nx.kamada_kawai_layout(G))

In [None]:
# If your Graph has more than one component, this will return False:
print(nx.is_connected(G))

# Next, use nx.connected_components to get the list of components,
# then use the max() command to find the largest one:
components = nx.connected_components(G)
largest_component = max(components, key=len)

# Create a "subgraph" of just the largest component
# Then calculate the diameter of the subgraph, just like you did with density.
#

subgraph = G.subgraph(largest_component)
diameter = nx.diameter(subgraph)
print("Network diameter of largest component:", diameter)

In [None]:
triadic_closure = nx.transitivity(G)
print("Triadic closure:", triadic_closure)

In [None]:
degree_dict = dict(G.degree(G.nodes()))
nx.set_node_attributes(G, degree_dict, 'degree')

In [None]:
from operator import itemgetter
sorted_degree = sorted(degree_dict.items(), key=itemgetter(1), reverse=True)

In [None]:
print("Top 20 nodes by degree:")
for d in sorted_degree[:20]:
    print(d)

In [None]:
betweenness_dict = nx.betweenness_centrality(G) # Run betweenness centrality
# eigenvector_dict = nx.eigenvector_centrality(G) # Run eigenvector centrality

# Assign each to an attribute in your network
nx.set_node_attributes(G, betweenness_dict, 'betweenness')
# nx.set_node_attributes(G, eigenvector_dict, 'eigenvector')

In [None]:
sorted_betweenness = sorted(betweenness_dict.items(), key=itemgetter(1), reverse=True)

print("Top 20 nodes by betweenness centrality:")
for b in sorted_betweenness[:20]:
    print(b)

In [None]:
from networkx.algorithms import community #This part of networkx, for community detection, needs to be imported separately.

communities = community.greedy_modularity_communities(G)

In [None]:
modularity_dict = {} # Create a blank dictionary
for i,c in enumerate(communities): # Loop through the list of communities, keeping track of the number for the community
    for name in c: # Loop through each person in a community
        modularity_dict[name] = i # Create an entry in the dictionary for the person, where the value is which group they belong to.

# Now you can add modularity information like we did the other metrics
nx.set_node_attributes(G, modularity_dict, 'modularity')

In [None]:
# First get a list of just the nodes in that class
class0 = [n for n in G.nodes() if G.nodes[n]['modularity'] == 0]

# Then create a dictionary of the eigenvector centralities of those nodes
class0_degree = {n:G.nodes[n]['degree'] for n in class0}

# Then sort that dictionary and print the first 5 results
class0_sorted_by_degree = sorted(class0_degree.items(), key=itemgetter(1), reverse=True)

print("Modularity Class 0 Sorted by degree Centrality:")
for node in class0_sorted_by_degree[:5]:
    print("Name:", node[0], "| degree Centrality:", node[1])

In [None]:
# for i,c in enumerate(communities): # Loop through the list of communities
#     if len(c) > 2: # Filter out modularity classes with 2 or fewer nodes
#         print('Class '+str(i)+':', list(c)) # Print out the classes and their members

In [None]:
plt.figure(figsize=(10,10))
nx.draw_networkx(G, with_labels=False)

In [None]:
# this returns a list of set of nodes belonging to the 
# different (weakly) connected components
components = list(nx.connected_components(G))

# sort the component according to their size
components = list(sorted(components, key=lambda x:len(x), reverse=True))

In [None]:
# make a list with the size of each component
comp_sizes = []
for comp in components:
    comp_sizes.append(len(comp))

In [None]:
# plot the histogram of component sizes
hist = plt.hist(comp_sizes, bins=100)

In [None]:
# let's make a new graph which is the subgraph of G corresponding to 
# the largest connected component
# let's find the largest component
largest_comp = components[0]
LCC = G.subgraph(largest_comp)

In [None]:
nx.draw_networkx(LCC, with_labels=False)

In [None]:
LCC.number_of_nodes()

In [None]:
G = nx.random_geometric_graph(200, 0.125)
# position is stored as node attribute data for random_geometric_graph
pos = nx.get_node_attributes(G, 'pos')

# find node near center (0.5,0.5)
dmin = 1
ncenter = 0
for n in pos:
    x, y = pos[n]
    d = (x - 0.5)**2 + (y - 0.5)**2
    if d < dmin:
        ncenter = n
        dmin = d

# color by path length from node near center
p = dict(nx.single_source_shortest_path_length(G, ncenter))

plt.figure(figsize=(8, 8))
nx.draw_networkx_edges(G, pos, nodelist=[ncenter], alpha=0.4)
nx.draw_networkx_nodes(G, pos, nodelist=list(p.keys()),
                       node_size=80,
                       node_color=list(p.values()),
                       cmap=plt.cm.Reds_r)

plt.xlim(-0.05, 1.05)
plt.ylim(-0.05, 1.05)
# plt.axis('off')
plt.show()

In [None]:
import math


# This example needs Graphviz and either PyGraphviz or pydot.
# from networkx.drawing.nx_pydot import graphviz_layout as layout
from networkx.drawing.nx_agraph import graphviz_layout as layout

# If you don't have pygraphviz or pydot, you can do this
# layout = nx.spring_layout


n = 150  # 150 nodes
# p value at which giant component (of size log(n) nodes) is expected
p_giant = 1.0 / (n - 1)
# p value at which graph is expected to become completely connected
p_conn = math.log(n) / float(n)

# the following range of p values should be close to the threshold
pvals = [0.003, 0.006, 0.008, 0.015]

region = 220  # for pylab 2x2 subplot layout
plt.subplots_adjust(left=0, right=1, bottom=0, top=0.95, wspace=0.01, hspace=0.01)
for p in pvals:
    G = nx.binomial_graph(n, p)
    pos = layout(G)
    region += 1
    plt.subplot(region)
    plt.title(f"p = {p:.3f}")
    nx.draw(G, pos, with_labels=False, node_size=10)
    # identify largest connected component
    Gcc = sorted(nx.connected_components(G), key=len, reverse=True)
    G0 = G.subgraph(Gcc[0])
    nx.draw_networkx_edges(G0, pos, edge_color="r", width=6.0)
    # show other connected components
    for Gi in Gcc[1:]:
        if len(Gi) > 1:
            nx.draw_networkx_edges(
                G.subgraph(Gi), pos, edge_color="r", alpha=0.3, width=5.0,
            )
plt.show()