<a href="https://colab.research.google.com/github/AyushSHK/TwoTowerThesisProject/blob/main/TwoTowerModelGraph.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Copyright 2020 The TensorFlow Authors.

As per the Apache license requirements, this model is a derivative work of the official Tensorflow tutorial and such I have left the original copyright mark in accordance with the Apache license rules. All additions the model are by me. I have also left a copy of the apache notice as per the license.

In [None]:
#@title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

In [None]:
!pip install -q tensorflow-recommenders
!pip install -q --upgrade tensorflow-datasets

In [None]:
import os # May need to run this code in colab to restart the kernel if the packages below are not showing up. Try the ones below first
!pip install -q tensorflow-recommenders
os.kill(os.getpid(), 9)

In [None]:
import os
import tempfile
%matplotlib inline
import matplotlib.pyplot as plt
os.environ["SM_FRAMEWORK"] = "tf.keras"
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

import tensorflow_recommenders as tfrs

plt.style.use('seaborn-whitegrid')

In [None]:
%load_ext tensorboard
import datetime

Preprocessing

In [None]:
ratings = tfds.load("movielens/100k-ratings", split="train")
movies = tfds.load("movielens/100k-movies", split="train") # load tensorflow dataset ratings and movies
ratedf=ratings
ratings = ratings.map(lambda x: {
    "movie_title": x["movie_title"],
    "user_id": x["user_id"],
    "timestamp": x["timestamp"],
    "occ_label": x["user_occupation_label"],
    "age":x["bucketized_user_age"],
    "gen":(tf.where(x["user_gender"],1.0,0.0)),
    "occtext":tf.strings.regex_replace(x["user_occupation_text"],"/"," "),
    "genres":x['movie_genres']
}) # create dictionary of values from tensorflow dataset change the format of some features



movies = movies.map(lambda x: x["movie_title"])

In [None]:
from enum import unique
import pandas as pd
df2=tfds.as_dataframe(ratings) # turn into dataframe
df=df2
df2.head()

In [None]:
#df2=np.array2string(df["genres"].values)

print(type(df["genres"][0]))
df['genres'] = df['genres'].apply(lambda x: np.array2string(x)) 
df["genres"] = df["genres"].str.replace('[','')
df["genres"] = df["genres"].str.replace(']','')
df["genres"] = df["genres"].str.replace('b','')
df["genres"] = df["genres"].str.replace("'",'')
df["genres"] = df["genres"].str.replace(","," ")
df.head() #convert genres to strings and strip uneccesary characters

In [None]:
dataset = tf.data.Dataset.from_tensor_slices(#create custom dataset from amended dataframe
    ({
      "age":df['age'].values, 
      "gen":df['gen'].values,
      "genres":df['genres'].values,
      "movie_title":df['movie_title'].values,
      "occ_label":df['occ_label'].values,
      "occtext":df['occtext'].values,
      "timestamp":df['timestamp'].values,
      "user_id":df['user_id'].values}))

In [None]:
dataset.element_spec

In [None]:
datas = dataset.map(lambda x: {#Custome dataset back to a dict
    "movie_title": x["movie_title"],
    "user_id": x["user_id"],
    "timestamp": x["timestamp"],
    "occ_label": x["occ_label"],
    "age":x["age"],
    "gen":x["gen"],
    "occtext":tf.strings.regex_replace(x["occtext"],"/"," "),
    "genres":x["genres"],
})

Prepare buckets and vocabulary

In [None]:
timestamps = np.concatenate(list(datas.map(lambda x: x["timestamp"]).batch(100)))

max_timestamp = timestamps.max()
min_timestamp = timestamps.min()

timestamp_buckets = np.linspace(
    min_timestamp, max_timestamp, num=1000,
)
ages = np.concatenate(list(datas.map(lambda x: x["age"]).batch(100)))

max_age = ages.max()
min_age = ages.min()
age_buckets = np.linspace(
    min_age, max_age, num=20,
)
genress=datas.map(lambda x: x["genres"]).batch(100)
occtexts=datas.map(lambda x: x["occtext"]).batch(100)
unique_movie_titles = np.unique(np.concatenate(list(movies.batch(1000))))
unique_user_ids = np.unique(np.concatenate(list(datas.batch(1_000).map(
    lambda x: x["user_id"]))))
unique_occ_ids = np.unique(np.concatenate(list(datas.batch(1_000).map(
    lambda x: x["occ_label"]))))
unique_gen = np.unique(np.concatenate(list(datas.batch(1_000).map(
    lambda x: x["gen"]))))

## Model definition

In [None]:
class UserModel(tf.keras.Model):
  
  def __init__(self):
    super().__init__()
#all the encoding and preprocessing for all the query features
    self.user_embedding = tf.keras.Sequential([
        tf.keras.layers.StringLookup(
            vocabulary=unique_user_ids, mask_token=None),
        tf.keras.layers.Embedding(len(unique_user_ids) + 1, 32),
    ])
    self.gen_embedding = tf.keras.Sequential([
        tf.keras.layers.IntegerLookup(
            vocabulary=unique_gen, mask_token=None),
        tf.keras.layers.Embedding(len(unique_gen) + 1, 1),
    ])

    self.occ_embedding = tf.keras.Sequential([
        tf.keras.layers.IntegerLookup(
            vocabulary=unique_occ_ids, mask_token=None),
        tf.keras.layers.Embedding(len(unique_occ_ids) + 1, 32),
    ])
    self.timestamp_embedding = tf.keras.Sequential([
        tf.keras.layers.Discretization(timestamp_buckets.tolist()),
        tf.keras.layers.Embedding(len(timestamp_buckets) + 1, 32),
    ])
    self.normalized_timestamp = tf.keras.layers.Normalization(
        axis=None
    )
    self.age_embedding = tf.keras.Sequential([
        tf.keras.layers.Discretization(age_buckets.tolist()),
        tf.keras.layers.Embedding(len(age_buckets) + 1, 32),
    ])
    self.normalized_age = tf.keras.layers.Normalization(
        axis=None
    )
    self.occ_vectorizer = tf.keras.layers.TextVectorization(
        max_tokens=100)

    self.occ_text_embedding = tf.keras.Sequential([
      self.occ_vectorizer,
      tf.keras.layers.Embedding(100, 32, mask_zero=True),
      tf.keras.layers.GlobalAveragePooling1D(),
    ])

    self.genre_vectorizer = tf.keras.layers.TextVectorization(
        max_tokens=100)

    self.genre_embedding = tf.keras.Sequential([
      self.genre_vectorizer,
      tf.keras.layers.Embedding(22, 32, mask_zero=True),
      tf.keras.layers.GlobalAveragePooling1D(),
    ])

    self.genre_vectorizer.adapt(genress)
    self.occ_vectorizer.adapt(occtexts)
    self.normalized_timestamp.adapt(timestamps)
    self.normalized_age.adapt(ages)
  def getResult(self,inputs): # returns a copy of the result for intermediate neuron activation
        return tf.concat([
        self.user_embedding(inputs["user_id"]),
        self.occ_embedding(inputs["occ_label"]),
        self.timestamp_embedding(inputs["timestamp"]),
        tf.reshape(self.normalized_timestamp(inputs["timestamp"]), (-1, 1)),
        self.age_embedding(inputs["age"]),
        tf.reshape(self.normalized_age(inputs["age"]), (-1, 1)),
        self.gen_embedding(inputs["gen"]),
        self.occ_text_embedding(inputs["occtext"]),
        self.genre_embedding(inputs["genres"]),
    ], axis=1)
  def call(self, inputs):
    # Take the input dictionary, pass it through each input layer,
    # and concatenate the result.
    return tf.concat([
        self.user_embedding(inputs["user_id"]),
        self.occ_embedding(inputs["occ_label"]),
        self.timestamp_embedding(inputs["timestamp"]),
        tf.reshape(self.normalized_timestamp(inputs["timestamp"]), (-1, 1)),
        self.age_embedding(inputs["age"]),
        tf.reshape(self.normalized_age(inputs["age"]), (-1, 1)),
        self.gen_embedding(inputs["gen"]),
        self.occ_text_embedding(inputs["occtext"]),
        self.genre_embedding(inputs["genres"]),
    ], axis=1)

In [None]:
class QueryModel(tf.keras.Model):
 

  def __init__(self, layer_sizes):

    super().__init__()

  
    self.embedding_model = UserModel()


    self.dense_layers = tf.keras.Sequential()


    for layer_size in layer_sizes[:-1]:
      self.dense_layers.add(tf.keras.layers.Dense(layer_size, activation="relu"))
    
    self.dense_layers.add(tf.keras.layers.Reshape((2,16)))
    self.dense_layers.add(tf.keras.layers.Conv1D(
        filters=32, kernel_size=1, strides=1, padding="same", activation="relu"))
    self.dense_layers.add(tf.keras.layers.GlobalMaxPool1D(
    data_format='channels_last', keepdims=False
)) 
  
    for layer_size in layer_sizes[-1:]:
      self.dense_layers.add(tf.keras.layers.Dense(layer_size))
  
  def getUser(self): #needed to access sub model
    return self.embedding_model  
  def call(self, inputs):
    feature_embedding = self.embedding_model(inputs)
    return self.dense_layers(feature_embedding)

In [None]:
class MovieModel(tf.keras.Model):
  
  def __init__(self):
    super().__init__()

    max_tokens = 10_000

    self.title_embedding = tf.keras.Sequential([
      tf.keras.layers.StringLookup(
          vocabulary=unique_movie_titles,mask_token=None),
      tf.keras.layers.Embedding(len(unique_movie_titles) + 1, 32)
    ])

    self.title_vectorizer = tf.keras.layers.TextVectorization(
        max_tokens=max_tokens)

    self.title_text_embedding = tf.keras.Sequential([
      self.title_vectorizer,
      tf.keras.layers.Embedding(max_tokens, 32, mask_zero=True),
      tf.keras.layers.GlobalAveragePooling1D(),
    ])

    self.title_vectorizer.adapt(movies)
  def getResult(self,titles):
    return tf.concat([
        self.title_embedding(titles),
        self.title_text_embedding(titles),
    ], axis=1)
  def call(self, titles):
    return tf.concat([
        self.title_embedding(titles),
        self.title_text_embedding(titles),
    ], axis=1)

In [None]:
class CandidateModel(tf.keras.Model):


  def __init__(self, layer_sizes):

    super().__init__()

    self.embedding_model = MovieModel()

    self.dense_layers = tf.keras.Sequential()


    for layer_size in layer_sizes[:-1]:
      self.dense_layers.add(tf.keras.layers.Dense(layer_size, activation="relu"))
  
    self.dense_layers.add(tf.keras.layers.Reshape((2,16)))
    self.dense_layers.add(tf.keras.layers.Conv1D(
        filters=32, kernel_size=1, strides=1, padding="same", activation="relu"))
    self.dense_layers.add(tf.keras.layers.GlobalMaxPool1D(
    data_format='channels_last', keepdims=False
)) # Mirrors other tower
    
    for layer_size in layer_sizes[-1:]:
      self.dense_layers.add(tf.keras.layers.Dense(layer_size))
  def getMovie(self):
    return self.embedding_model
      
  def call(self, inputs):
    feature_embedding = self.embedding_model(inputs)
    return self.dense_layers(feature_embedding)

In [None]:
class MovielensModel(tfrs.models.Model):

  def __init__(self, layer_sizes):
    super().__init__()
    self.query_model = QueryModel(layer_sizes)
    self.candidate_model = CandidateModel(layer_sizes)
    self.task = tfrs.tasks.Retrieval(
        metrics=tfrs.metrics.FactorizedTopK(
            candidates=movies.batch(128).map(self.candidate_model),
        ),
    )
  def getMovie(self):
    return self.candidate_model
  def getUser(self):
    return self.query_model
  def compute_loss(self, features, training=False):

    query_embeddings = self.query_model({
        "user_id": features["user_id"],
        "timestamp": features["timestamp"],
        "occ_label": features["occ_label"],
        "age":features["age"],
        "gen":features["gen"],
        "occtext":features["occtext"],
        "genres":features["genres"],
    })
    movie_embeddings = self.candidate_model(features["movie_title"])

    return self.task(
        query_embeddings, movie_embeddings)

In [None]:

# Train+validation data
shuffled = datas.shuffle(100000, reshuffle_each_iteration=False)

train = shuffled.take(80000)
test = shuffled.skip(80000).take(20000)

cached_train = train.shuffle(100000).batch(2048)
cached_test = test.batch(4096).cache()

In [None]:
#one_layer_history.history.keys() 

In [None]:
num_epochs = 50 # 30 min run time approximately, only works in non gpu runtime

model = MovielensModel([64,32,16])
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.1))
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") #Tensorboard logs
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, write_graph=True)

one_layer_history = model.fit(
    cached_train,
    validation_data=cached_test,
    validation_freq=5,
    epochs=num_epochs,
    verbose=0,
    callbacks=[tensorboard_callback])
model.fit(cached_train,epochs=num_epochs)
accuracy = one_layer_history.history["factorized_top_k/top_100_categorical_accuracy"][-1]
print(f"Top-100 accuracy: {accuracy:.2f}.")

In [None]:
'''writer = tf.summary.create_file_writer('./graphs')
tf.summary.trace_on(graph=True, profiler=True)
with writer.as_default():
  tf.summary.trace_export(
      name="trace",
      step=0,
      profiler_outdir='./graphs')
tf.summary.trace_on(graph=True, profiler=True)'''

%tensorboard --logdir logs/fit


In [None]:
#Brute force layer to retrieve candidates
brute_force = tfrs.layers.factorized_top_k.BruteForce(model.query_model)
brute_force.index_from_dataset(
  tf.data.Dataset.zip((movies.batch(100), movies.batch(100).map(model.candidate_model)))
)
#brute_force.index(movies.batch(128).map(model.candidate_model), movies)
_, titles = brute_force({
    "user_id": np.array(["996"]),
    "timestamp": np.array([879024327]),
    "occ_label": np.array([17]),
    "age": np.array([27.0]),
    "gen": np.array([0.0]),
    "occtext": np.array(["student"]),
    "genres":np.array(["4 10"])
    },
    k=20
)

print(titles)

In [None]:
#Neuron extraction
from keras import backend as K
Layerout=[]
movie=model.getMovie()
#movie.get_layer(name="sequential_11").summary()
user=model.getUser()
user.summary()
#user.get_layer(name="sequential_8").summary()
Seqname="sequential_7" #Normally defaults to this but if you run the model twice it changes!
user.get_layer(name=Seqname).summary()

input={
    "user_id": np.array(["996"]),
    "timestamp": np.array([879024327]),
    "occ_label": np.array([4]),
    "age": np.array([55.0]),
    "gen": np.array([0.0]),
    "occtext": np.array(["doctor"]),
    "genres":np.array(["4 10"])
    
    } # need to manually change inputs to whatver combination to get all query side activations
res=user.getUser().getResult(input)


#from keras import backend as K

get_1st = K.function(
  [user.get_layer(name=Seqname).layers[0].input], # param 1 will be treated as layer[0].output
  [user.get_layer(name=Seqname).layers[0].output]) r
#Need an individual function for each layer

get_2nd = K.function(
  [user.get_layer(name=Seqname).layers[0].input], # param 1 will be treated as layer[0].output
  [user.get_layer(name=Seqname).layers[1].output]) 

get_3rd = K.function(
  [user.get_layer(name=Seqname).layers[0].input], # param 1 will be treated as layer[0].output
  [user.get_layer(name=Seqname).layers[2].output]) 


get_4th = K.function(
  [user.get_layer(name=Seqname).layers[0].input], # param 1 will be treated as layer[0].output
  [user.get_layer(name=Seqname).layers[3].output]) 

get_5th = K.function(
  [user.get_layer(name=Seqname).layers[0].input], # param 1 will be treated as layer[0].output
  [user.get_layer(name=Seqname).layers[4].output]) 

get_6th = K.function(
  [user.get_layer(name=Seqname).layers[0].input], # param 1 will be treated as layer[0].output
  [user.get_layer(name=Seqname).layers[5].output])
fin=[]
# here X is param 1 (input) and the function returns output from layers[3]
output1 = get_1st(res)[0]
output2= get_2nd(res)[0]
output3= get_6th(res)[0]
outputConv=get_5th(res)[0]

fin=np.concatenate([output1[0], output2[0],outputConv[0]])
print(len(fin))

In [None]:
inputMov={
    "movie_title": np.array(["b\'Braindead (1992)'\""])}
#inputMov="b'Second Jungle Book: Mowgli & Baloo, The (1997)'""    
# need to manually change inputs to whatver combination to get all movie side activations
resM=movie.getMovie().getResult(inputMov)
SeqM="sequential_10" #Normally defaults to this but if you run the model twice it changes!
movie.summary()
movie.get_layer(name=SeqM).summary()

get_1stM = K.function(
  [movie.get_layer(name=SeqM).layers[0].input], # param 1 will be treated as layer[0].output
  [movie.get_layer(name=SeqM).layers[0].output]) 


get_2ndM = K.function(
  [movie.get_layer(name=SeqM).layers[0].input], # param 1 will be treated as layer[0].output
  [movie.get_layer(name=SeqM).layers[1].output]) 

get_3rdM = K.function(
  [movie.get_layer(name=SeqM).layers[0].input], # param 1 will be treated as layer[0].output
  [movie.get_layer(name=SeqM).layers[2].output]) 

get_4thM = K.function(
  [movie.get_layer(name=SeqM).layers[0].input], # param 1 will be treated as layer[0].output
  [movie.get_layer(name=SeqM).layers[3].output]) 

get_5thM = K.function(
  [movie.get_layer(name=SeqM).layers[0].input], # param 1 will be treated as layer[0].output
  [movie.get_layer(name=SeqM).layers[4].output]) 

get_6thM = K.function(
  [movie.get_layer(name=SeqM).layers[0].input], # param 1 will be treated as layer[0].output
  [movie.get_layer(name=SeqM).layers[5].output]) 

finM=[]

output1M = get_1stM(resM)[0]
output2M= get_2ndM(resM)[0]
output3M= get_6thM(resM)[0]
outputConvM=get_5thM(resM)[0]
finM=np.concatenate([output1M[0], output2M[0],outputConvM[0]])
print(finM)

In [None]:
dp=np.dot(np.squeeze(output3),np.squeeze(output3M))
dp=np.dot(np.squeeze(output3),np.squeeze(output3M))
print(dp)
#computes dot product and a sigmoid values
def softmax(x):
    """Compute softmax values for each sets of scores in x."""
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0)
def sigmoid(x):
  return 1 / (1 + np.exp(-x))
#print(softmax(dp))
print(sigmoid(dp))

In [None]:
row1=np.concatenate([fin,finM,[dp],[sigmoid(dp)]]) #creates neuron row, used if first row in a new set of rows

In [None]:
#row1=np.concatenate([fin,finM,[dp],[sigmoid(dp)]])
row2=np.concatenate([fin,finM,[dp],[sigmoid(dp)]]) #use if second+ row
print(row1)
row1=np.vstack([row1,row2])

In [None]:
#import scipy

In [None]:
#row1=np.vstack([row1,fin])
coeff=np.corrcoef(row1,rowvar=False) #correlation coefficent matrix creation
coeffFin=np.nan_to_num(coeff)
print(np.shape(coeffFin))
coeffFin=np.round(coeffFin, 3)
q3=np.percentile(coeffFin,97)
#q3=0.5
print(q3)
coeffFin[coeffFin < 0] = 0
coeffFin[coeffFin < q3] = 0
row1=np.round(row1, 3) #3 s f
print(coeffFin)
np.savetxt("foo.csv", coeffFin, delimiter=",",fmt='%f') #in case you want to see values
np.savetxt("trial1.csv", row1, delimiter=",",fmt='%f')

In [None]:
!pip install python-igraph==0.8.3 
!apt install libcairo2-dev pkg-config python3-dev
!pip install python-igraph leidenalg cairocffi

In [None]:
import csv
import igraph as ig
import cairocffi

with open('trial1.csv', newline='') as f:
    reader = csv.reader(f)
    vertex = list(reader)

print(vertex)

with open('foo.csv', newline='') as f:
    reader = csv.reader(f)
    edges = list(reader)


G=ig.Graph.Weighted_Adjacency(coeffFin ,mode='undirected',  attr='weight', loops=False) # create graph
G.vs[0:63]["layer"]="query-Dense 1"
G.vs[64:95]["layer"]="query-Dense 2"
G.vs[96:127]["layer"]="query-Pool"
G.vs[128:191]["layer"]="Movie-Dense 1"
G.vs[192:223]["layer"]="Movie-Dense 2"
G.vs[204:255]["layer"]="Movie-Pool"
G.vs[256]["layer"]="DotProduct"
G.vs[257]["layer"]="Sigmoid Output"
G.vs[0:63]["color"]="blue"
G.vs[64:95]["color"]="cyan"
G.vs[96:127]["color"]="magenta"
G.vs[128:191]["color"]="red"
G.vs[192:223]["color"]="orange"
G.vs[204:255]["color"]="yellow"
G.vs[256]["color"]="purple"
G.vs[257]["color"]="green"
G.vs["label"] = range(G.vcount())
ig.plot(G,vertex_color = G.vs['color'],vertex_label=G.vs["label"])

In [None]:
filename="ComHorrPos" # set file names

In [None]:
vseq = G.vs #delete unconnected nodes
to_delete_ids = [v.index for v in G.vs if v.degree() == 0]
G.delete_vertices(to_delete_ids)
ig.plot(G,filename+"Full.png",vertex_color = G.vs['color'],vertex_label=G.vs["label"],vertex_label_size=10)


In [None]:
comms = G.community_multilevel(weights="weight")
print(comms.modularity)
print(comms)
G.vs["group"] = comms.membership
membership=[]
for i,mem in enumerate(comms.membership):
  cluster=[]
  for j,item in enumerate(G.vs["group"]):
    if G.vs["group"][j]==i:
      cluster.append(G.vs["label"][j])
  if len(cluster)==0:
    break
  membership.append(cluster)
Loudf = pd.DataFrame.from_records(membership)  #Get communities 
ig.plot(comms,filename+"Lou.png", mark_groups = True,vertex_color = G.vs['color'],vertex_label=G.vs["label"],vertex_label_size=10)



In [None]:
comms = G.community_leiden(objective_function='modularity',weights="weight")
print(comms.modularity)
membership=[]
for i,mem in enumerate(comms.membership):
  cluster=[]
  for j,item in enumerate(G.vs["group"]):
    if G.vs["group"][j]==i:
      cluster.append(G.vs["label"][j])
  if len(cluster)==0:
    break
  membership.append(cluster)
Leidf = pd.DataFrame.from_records(membership)#Get communities 
ig.plot(comms,filename+"Lei.png" ,mark_groups = True,vertex_color = G.vs['color'],vertex_label=G.vs["label"],vertex_label_size=10)

In [None]:
Leidf.head()

In [None]:
Loudf.fillna('') #clean and save membership to csv

Loudf.to_csv(filename+"Lou.csv",index=False)


In [None]:
Leidf.fillna('') #clean and save membership to csv
Leidf.to_csv(filename+"Lei.csv",index=False)

In [None]:
pr=G.pagerank(weights='weight')

vc=[G.vertex_connectivity()]
dens=[G.density()]
bet=G.betweenness(weights='weight')
vlab=G.vs["label"]
close=G.closeness(weights='weight')
print(dens)
print(vc)

Indexs=[]
for v in G.vs:
  Indexs.append(v.index)

Gdf = pd.DataFrame(list(zip(Indexs,vlab, pr,bet,close)),
               columns =['Node ID','Original Node ID', 'Page Rank Centrality','Betweeness','Closeness Centrality'])

Gdf.to_csv(filename+"Measures.csv",index=False) # get all centrality information and save to df
ig.plot(G, vertex_color = G.vs['color'],vertex_label=pr,vertex_label_size=10)

In [None]:
Gdf.head()

In [None]:
ig.summary(G)
#G.write_edgelist("edges.txt")
G.write_pajek(filename+"Paj.txt")	#get paj file for compnet use
  #all similarity calculations are done in compnet