In [None]:
%matplotlib inline

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import random
import pandas as pd
import re
import networkx as nx
from geopy.geocoders import Nominatim
from pyvis.network import Network
import requests

from networkx.algorithms import community

In [None]:
with open("friends.txt", encoding="UTF8") as file:
    friends = file.read()

In [None]:
nodes_list = re.findall(r'Node\n\n(.*)', friends)
print('Count of nodes: ', len(nodes_list))
assert len(nodes_list) == len(set(nodes_list)), 'Duplicates detected!'

In [None]:
splitted_friends = re.split(r'Node\n\n(.*)', friends)
edge_dict = {}
node = None
for i in splitted_friends:
    if splitted_friends.index(i) % 2 == 1:
        node = i
    else:
        edge_dict[node] = i
print('Count of elements in dict: ', len(edge_dict))

In [None]:
edge_frame = pd.DataFrame.from_dict(edge_dict, orient='index', columns=['target']).reset_index().rename(columns={"index": "source"})
edge_frame.head()

In [None]:
edge_frame = edge_frame[1::]
edge_frame.target = edge_frame.target.str.split('\n\n')
edge_frame.head()

In [None]:
rows = list()
for row in edge_frame[['source', 'target']].iterrows():
    r = row[1]
    for target in r.target:
        rows.append((r.source, target))

edge_frame = pd.DataFrame(rows, columns=['source', 'target'])
edge_frame.head()

In [None]:
assert len(nodes_list) == len(edge_frame.source.unique()), 'Records missmatch!'

In [None]:
edge_frame = edge_frame[~edge_frame.target.isin(['', 'Edges', 'NA'])].reset_index(drop=True)
edge_frame.head()

In [None]:
pattern = r'(?P<relationship>Add Friend|Friend\nFriends|Friend Request Sent|Acquaintance\nFriends)\n\
(?P<target>.*)(?P<num_friends>\n\d*,?\d*|\n.*)?(?P<description>.*)?'

In [None]:
regex_frame = edge_frame.target.str.extractall(pattern).reset_index()
regex_frame.tail()

In [None]:
missed_records = edge_frame[~edge_frame.index.isin(regex_frame.level_0)]
missed_records.head()

In [None]:
missed_records.count()

In [None]:
miss_pattern = r'(?P<target>.*)\n(?P<num_friends>\d*,?\d*|\n.*)?(?P<description>.*)'
missed_records = missed_records.target.str.extractall(miss_pattern).reset_index()
missed_records.head()

In [None]:
combined_regex = pd.concat(objs=[regex_frame, missed_records], sort=False).reset_index(drop=True)

In [None]:
# how many we missed again
edge_frame[~edge_frame.index.isin(combined_regex.level_0)]

In [None]:
# we remove ^ records
edge_frame = edge_frame[edge_frame.index.isin(combined_regex.level_0)]
edge_frame.tail()

In [None]:
edge_frame = combined_regex.join(other=edge_frame, on='level_0', rsuffix='2')
edge_frame.head()

In [None]:
# drop unused columns
edge_frame = edge_frame.reset_index(drop=True).drop(columns=['level_0', 'match', 'target2'])
edge_frame.head()

In [None]:
# aesthetics
edge_frame = edge_frame[['source', 'target', 'relationship', 'description', 'num_friends']]
edge_frame.head()

In [None]:
print('NA values: \n\n', edge_frame.source.isna().value_counts(), '\n\n',
      edge_frame.target.isna().value_counts(), '\n\n',
      edge_frame.description.isna().value_counts(), '\n\n',
      edge_frame.relationship.isna().value_counts())

In [None]:
edge_frame = edge_frame[~edge_frame.target.isna()]

In [None]:
edge_frame.description = edge_frame.description.fillna(' ')
edge_frame.relationship = edge_frame.relationship.fillna(' ')

In [None]:
print('NA values: \n\n', edge_frame.source.isna().value_counts(), '\n\n',
      edge_frame.target.isna().value_counts(), '\n\n',
      edge_frame.description.isna().value_counts(), '\n\n',
      edge_frame.relationship.isna().value_counts())

In [None]:
# Common names register as the same person
edge_frame.target.value_counts().head(20)

In [None]:
len(edge_frame)

In [None]:
edge_frame = edge_frame.drop_duplicates(keep=False)

In [None]:
G = nx.from_pandas_edgelist(df=edge_frame, source='source', target='target')

In [None]:
G = nx.to_undirected(G)

In [None]:
nx.write_gexf(G, 'full_processed.gexf')

In [None]:
edge_frame_trimmed = edge_frame.copy()
edge_frame_trimmed['target_count'] = pd.Series(edge_frame_trimmed.groupby('target').target.transform('count'))
edge_frame_trimmed = edge_frame_trimmed[edge_frame_trimmed.target_count > 1]

In [None]:
G = nx.from_pandas_edgelist(df=edge_frame_trimmed, source='source', target='target')
G = nx.to_undirected(G)
nx.write_gexf(G, 'full_trimmed.gexf')

In [None]:
ego_frame = edge_frame.copy()
ego_frame = ego_frame[ego_frame.target.isin(nodes_list)]

In [None]:
G = nx.from_pandas_edgelist(df=ego_frame, source='source', target='target')
G = nx.to_undirected(G)
nx.write_gexf(G, 'ego_graph.gexf')

In [None]:
# geolocator = Nominatim(user_agent="pavel2", timeout=10)
# geolocator.geocode('Софийски университет').address

In [None]:
edge_frame.description = edge_frame.description.str.replace(r'.*at ', ' ')
edge_frame.description = edge_frame.description.str.replace(' mutual friends', ' ')
edge_frame.description = edge_frame.description.str.replace(' friends',  ' ')
edge_frame.description = edge_frame.description.str.strip()

In [None]:
edge_frame.description.apply(lambda x: x.split()(['Varna', 'Варна'])

In [None]:
len(edge_frame.description.value_counts())

In [None]:
edge_frame.groupby('source').description

In [None]:
len(edge_frame.source.unique())

In [None]:
edge_frame.groupby('source').description.describe()

In [None]:
for i, j in edge_frame.description.value_counts()[1:250].items():
    if len(i.split(', ')) > 1:
        print(i.split(', ')[1:])



In [None]:
list_of_places = list(edge_frame.description.unique()[1:])

In [None]:
list_of_places

In [None]:
sample = ' Vibes.bg'
sample = '+'.join(sample.split())

In [None]:
print('Googling...') # display text while downloading the Google page
res = requests.get('https://www.google.com/maps?q=' + sample)
coordinates = re.findall(r"(https://maps\.google\.com/maps/api/staticmap\?center=)(?P<latitude>\d{1,3}\.\d*).*\D+(?P<longitude>\d{1,3}\.\d*)", res.text)
print(coordinates[0][1:3])


In [None]:
coordinates[0][1:3]

In [None]:
res.text

In [None]:
soup = bs4.BeautifulSoup(res.text)

In [None]:
linkElems = soup.select('.r a')

In [None]:
asd = re.findall(r"(;window\.APP_INITIALIZATION_STATE=\[\[\[\d*\.\d*,)(\d{1,3}\.\d{15},\d{1,3}\.\d{15})", res.text)

In [None]:
asd[0][1]

In [None]:
res.text.rfind(r"(;window\.APP_INITIALIZATION_STATE=\[\[\[\d*\.\d*,)(\d{1,3}\.\d{15},\d{1,3}\.\d{15})")

In [None]:
list_of_places = list(ego_frame.description.unique()[1:])

In [None]:
for i in list_of_places:
    i.strip()
    '+'.join(i.split(' '))

In [None]:
list_of_places

In [None]:
ego_frame.description.unique()

In [None]:
def eval_results(x):
    try:
        return (x.latitude, x.longitude)
    except:
        return (None, None)

df['CityCoordinates'] = df['city'].apply(geolocator.geocode, timeout=1000000).apply(lambda x

In [None]:
# disregard number of friends due to fluctuations during the data mining process
edge_frame['unique_id'] = edge_frame.target + edge_frame.relationship + edge_frame.description

In [None]:
edge_frame.head()

In [None]:
edge_frame.num_friends = edge_frame.num_friends.str.strip()
edge_frame.num_friends = edge_frame.num_friends.str.replace(',', '')

In [None]:
edge_frame.num_friends.head()

In [None]:
edge_frame.dtypes

In [None]:
edge_frame.num_friends = pd.to_numeric(edge_frame.num_friends, errors='coerce')

In [None]:
edge_frame.dtypes

In [None]:
len(edge_frame.unique_id.unique())

In [None]:
len(edge_frame.drop(.unique())

In [None]:
# edge_frame[edge_frame.target=='Александър Иванов']

In [None]:
edge_frame[edge_frame.unique_id.isna()]

In [None]:
edge_frame['target_count'] = pd.Series(edge_frame.groupby('target').target.transform('count'))

In [None]:
edge_frame.head()

In [None]:
# separate duplicates in another df
duplicates = edge_frame[edge_frame.target_count > 1].copy()

# trim duplicates from working df
edge_frame = edge_frame[edge_frame.target_count == 1]

In [None]:
# we know for a fact that all the first layer nodes are unique to each other
# and their unique_id is different from the rest
edge_frame = pd.concat([edge_frame, duplicates[(duplicates.relationship == 'Acquaintance\nFriends') \
                                               | (duplicates.relationship == 'Friend\nFriends')]])

In [None]:
edge_frame.tail()

In [None]:
# remove the unique values from the duplicates
duplicates = duplicates[~((duplicates.relationship == 'Acquaintance\nFriends') \
                                               | (duplicates.relationship == 'Friend\nFriends'))]

In [None]:
duplicates.head()

In [None]:
mutual_friends = duplicates[(duplicates.description == ' mutual friends')]

In [None]:
mutual_friends[(mutual_friends.duplicated(keep=False)) & (mutual_friends.num_friends > 5)]

In [None]:
print('Unique names: ', len(mutual_friends.target.unique()))
print('Unique IDs: ', len(mutual_friends.unique_id.unique()))

In [None]:
print('Unique names to remove: ', len(mutual_friends[(mutual_friends.duplicated(keep=False))].unique_id.unique()))

In [None]:
mutual_friends = mutual_friends[~(mutual_friends.duplicated(keep=False))]

In [None]:
print('Unique names: ', len(mutual_friends.target.unique()))
print('Unique IDs: ', len(mutual_friends.unique_id.unique()))

In [None]:
mutual_friends[(mutual_friends.duplicated(keep=False))]

In [None]:
mutual_friends.relationship.value_counts()

In [None]:
mutual_friends[mutual_friends.target.isin(mutual_friends.unique_id)]

In [None]:
# add unique records to working frame
edge_frame = pd.concat([edge_frame, mutual_friends])

In [None]:
# remove records from duplicate frame
duplicates = duplicates[duplicates.description != ' mutual friends']

In [None]:
duplicates.relationship.value_counts().head(30)

In [None]:
duplicates[(duplicates.relationship == ' ')].target.value_counts()

In [None]:
non_friends = duplicates[(duplicates.relationship == 'Add Friend')]

In [None]:
non_friends = non_friends.drop(columns='num_friends')

In [None]:
non_friends[non_friends.duplicated(keep=False)]

In [None]:
non_friends[non_friends.duplicated(keep=False)].description.value_counts()

In [None]:
mutual_friends.target.value_counts()

In [None]:
mutual_friends['unique'].value_counts()

In [None]:
len(mutual_friends.target.unique())

In [None]:
len(mutual_friends['unique'].unique())

In [None]:
assert len(mutual_friends.target.unique()) >= len(mutual_friends['unique'].unique()), 'Duplicates detected!'

In [None]:
# len(edge_frame[(edge_frame['count'] == 1) & (edge_frame.target.isin(nodes_list))])

In [None]:
edge_frame[edge_frame.target=='Ivan Ivanov']

In [None]:
ego_frame = edge_frame[edge_frame.relationship=='Friend\nFriends']

In [None]:
edge_frame[(edge_frame.target=='Yon Lazarova') & (edge_frame.descripion==' friends')]

In [None]:
ego_frame

In [None]:
edge_frame[edge_frame.target=='Polly Ivanova']

In [None]:
edge_frame[edge_frame.target.isin('Friend\nFriends\n')]

In [None]:
# with open("edgelist.txt", encoding="UTF8", mode='w') as file:
#     for i in edge_tuples:
#         file.write(f"{i[0]} {i[1]}\n")

In [None]:
# G = nx.Graph()

In [None]:
# G.add_nodes_from(nodes)
# G.add_edges_from(edge_tuples)

In [None]:
# nx.draw(G, with_labels=True)

In [None]:
# nx.draw(G, with_labels=False)

In [None]:
# graph.add_star('Павел Богданов')

In [None]:
graph = nx.Graph(edge_tuples)
graph_copy = graph.copy()
graph_copy_copy = graph.copy()
empty_graph = nx.Graph()

In [None]:
# nx.write_graphml(graph, 'graph.graphml')

In [None]:
# for i, j in graph_copy.degree:
#     if not i in nodes_list:
#         graph.remove_node(i)





# for i, j in graph_copy_copy.degree:
#     if i not in nodes_list:
#         graph_copy.remove_node(i)
#     if j > 10:
#         graph_copy.add_node(i)
# for i, j in graph_copy_copy.edges:
#     if i not in nodes_list or j not in nodes_list:
#         graph_copy.add_edge(i, j)

In [None]:
# for i, j in graph_copy.degree:
#     if not i in nodes_list:
#         graph.remove_node(i)

# for i, j in graph_copy_copy.degree:
#     if j <= 10:
#         graph_copy.remove_node(i)
# for i in graph_copy_copy.nodes:
#     if i in nodes_list:
#         graph_copy.remove_node(i)

In [None]:
print(nx.info(graph))

In [None]:
for i, j in graph_copy.degree:
    if not i in nodes_list:
        graph.remove_node(i)


In [None]:
for edge in graph_copy.edges:
    if "Павел Богданов" in edge:
        empty_graph.add_edge(edge[0], edge[1])


In [None]:
# Difference is due to inactive accounts
# that are still in my list of friends but don't exist
# and I didn't add them to the node_list
print(len(graph), len(empty_graph))

In [None]:
my_edges = []
for node in nodes_list:
    my_edges.append(('Павел Богданов', node))

In [None]:
# hub_ego = nx.ego_graph(graph, "Павел Богданов", radius=1)
# pos = nx.spring_layout(hub_ego, seed=1, iterations=100, dim=3)
# asd = {}
# for i, j in pos.items():
#     asd[i] = j[1:]
# pos = asd

In [None]:
[100*degree[1] for degree in hub_ego.degree]

In [None]:

[hub_ego.degree(node) for node in hub_ego.nodes()]


In [None]:
plt.figure(figsize=(125,85))
hub_ego = nx.ego_graph(graph, "Павел Богданов", radius=1)
pos = nx.spring_layout(hub_ego, seed=1)


nx.draw_networkx_labels(hub_ego, pos, alpha=0.8, font_size=17)
nx.draw_networkx_edges(hub_ego, pos, edge_color='c', alpha=0.7, width=1.7)
nx.draw_networkx_edges(hub_ego, pos, edgelist=my_edges, edge_color='b', alpha=0.8, width=2)

nodes = nx.draw_networkx_nodes(hub_ego, pos, nodelist=nodes_list, node_color='r', cmap=plt.cm.Blues, alpha=1, node_size=700, edgecolors='k')


plt.axis('off')
plt.savefig('blob18.png')
plt.show()

In [None]:
temp = nx.Graph(edge_tuples)

In [None]:
nx.write_gexf(temp, 'full_graph.gexf')

In [None]:
# plt.figure(figsize=(250,170))
# random.seed(42)
# np.random.seed(42)

# pos = nx.spring_layout(graph, seed=1)


# nx.draw_networkx_labels(graph, pos, alpha=0.8)
# nx.draw_networkx_edges(graph, pos, edge_color='c', alpha=0.7, width=2, node_size=500)
# nx.draw_networkx_edges(graph, pos, edgelist=my_edges, edge_color='b', alpha=0.8, width=2, node_size=500)
# nodes = nx.draw_networkx_nodes(graph, pos, nodelist=nodes_list, node_color='r', alpha=1, node_size=1000, edgecolors='k')


        
# pos = nx.spring_layout(graph_copy, seed=1)
# nodes = nx.draw_networkx_nodes(graph_copy, pos, node_color='g', node_size=42, alpha=0.7)
# nodes.set_edgecolor('k')
# nx.draw_networkx_labels(graph_copy, pos, alpha=0.7)
# nx.draw_networkx_edges(graph_copy, pos, edge_color='c', alpha=0.7, node_size=42)


# plt.axis('off')
# plt.savefig('blob17.png')
# plt.show()

In [None]:
# P = Network()
# P.from_nx(graph)

In [None]:
numbers = [int(i) for i in input().split(', ')]
beggars = int(input())

In [None]:
result = list()
for beggar in range(beggars):
    cash = 0
    for number in numbers[beggar::beggars]:
        cash += number
    result.append(cash)

In [None]:
print(result)

In [None]:
pathlengths=[]

print("source vertex {target:length, }")
for v in graph.nodes():
    spl=single_source_shortest_path_length(graph,v)
    print('%s %s' % (graph,spl))
    for p in spl.values():
        pathlengths.append(p)

print('')
print("average shortest path length %s" % (sum(pathlengths)/len(pathlengths)))

# histogram of path lengths
dist={}
for p in pathlengths:
    if p in dist:
        dist[p]+=1
    else:
        dist[p]=1

print('')
print("length #paths")
verts=dist.keys()
for d in sorted(verts):
    print('%s %d' % (d,dist[d]))

print("radius: %d" % radius(graph))
print("diameter: %d" % diameter(graph))
print("eccentricity: %s" % eccentricity(graph))
print("center: %s" % center(graph))
print("periphery: %s" % periphery(graph))
print("density: %s" % density(graph))